Skip to content

Commit cc8dc0c

Browse files
Add vgf to extension LLM
Signed-off-by: Sebastian Larsson <sebastian.larsson@arm.com> Change-Id: Ide55a1928215b21689a9732b61b00b9eaf4e952b
1 parent b778155 commit cc8dc0c

5 files changed

Lines changed: 93 additions & 3 deletions

File tree

examples/models/llama/export_llama_lib.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
get_openvino_partitioner,
4040
get_qnn_partitioner,
4141
get_tosa_partitioner,
42+
get_vgf_partitioner,
4243
get_vulkan_partitioner,
4344
get_xnnpack_partitioner,
4445
)
@@ -50,6 +51,7 @@
5051
get_pt2e_quantizers,
5152
get_qnn_quantizer,
5253
get_tosa_quantizer,
54+
get_vgf_quantizer,
5355
get_vulkan_quantizer,
5456
)
5557
from executorch.util.activation_memory_profiler import generate_memory_trace
@@ -824,6 +826,13 @@ def get_quantizer_and_quant_params(llm_config):
824826
llm_config.quantization.pt2e_quantize.value,
825827
)
826828
quantizers.append(ethosu_quantizer)
829+
if llm_config.backend.vgf.enabled and llm_config.quantization.pt2e_quantize:
830+
vgf_quantizer = get_vgf_quantizer(
831+
llm_config.backend.vgf.compile_spec,
832+
llm_config.backend.vgf.compiler_flags,
833+
llm_config.quantization.pt2e_quantize.value,
834+
)
835+
quantizers.append(vgf_quantizer)
827836
if llm_config.backend.vulkan.enabled and llm_config.quantization.pt2e_quantize:
828837
assert (
829838
len(quantizers) == 0
@@ -1013,6 +1022,14 @@ def _to_edge_and_lower_llama_arm(
10131022
)
10141023
)
10151024
modelname = f"ethosu_{modelname}"
1025+
elif llm_config.backend.vgf.enabled:
1026+
partitioners.append(
1027+
get_vgf_partitioner(
1028+
llm_config.backend.vgf.compile_spec,
1029+
llm_config.backend.vgf.compiler_flags,
1030+
)
1031+
)
1032+
modelname = f"vgf_{modelname}"
10161033
elif llm_config.backend.tosa.enabled:
10171034
partitioners.append(get_tosa_partitioner(llm_config.backend.tosa.version))
10181035
modelname = f"tosa_{modelname}"
@@ -1336,7 +1353,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
13361353

13371354
# export_to_edge
13381355
builder_manager = _prepare_for_llama_export(llm_config)
1339-
if llm_config.backend.tosa.enabled:
1356+
if (
1357+
llm_config.backend.tosa.enabled
1358+
or llm_config.backend.vgf.enabled
1359+
or llm_config.backend.ethosu.enabled
1360+
):
13401361
builder_manager.skip_dim_order = False
13411362
builder_exported = builder_manager.export()
13421363
builder_exported.run_canonical_optimizations()
@@ -1383,7 +1404,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
13831404
openvino_device=llm_config.backend.openvino.device,
13841405
verbose=llm_config.debug.verbose,
13851406
)
1386-
elif llm_config.backend.tosa.enabled or llm_config.backend.ethosu.enabled:
1407+
elif (
1408+
llm_config.backend.tosa.enabled
1409+
or llm_config.backend.ethosu.enabled
1410+
or llm_config.backend.vgf.enabled
1411+
):
13871412
builder = _to_edge_and_lower_llama_arm(
13881413
builder_exported,
13891414
modelname,

examples/models/llama/tests/test_export_llama_lib.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313
from executorch.backends.arm.quantizer.arm_quantizer import (
1414
EthosUQuantizer,
1515
TOSAQuantizer,
16+
VgfQuantizer,
1617
)
1718

1819
HAS_ARM_BACKEND = True
1920
except ImportError:
2021
HAS_ARM_BACKEND = False
2122
EthosUQuantizer = None
2223
TOSAQuantizer = None
24+
VgfQuantizer = None
2325

2426
from executorch.examples.models.llama.export_llama_lib import (
2527
_export_llama,
@@ -93,3 +95,19 @@ def test_get_quantizer_and_quant_params_returns_ethosu_quantizer(self):
9395
self.assertIsNone(quant_dtype)
9496
self.assertEqual(len(quantizers), 1)
9597
self.assertIsInstance(quantizers[0], EthosUQuantizer)
98+
99+
@unittest.skipUnless(HAS_ARM_BACKEND, "ARM backend not available")
100+
def test_get_quantizer_and_quant_params_returns_vgf_quantizer(self):
101+
llm_config = LlmConfig()
102+
llm_config.backend.vgf.enabled = True
103+
llm_config.backend.vgf.compile_spec = "TOSA-1.0+INT"
104+
llm_config.quantization.pt2e_quantize = Pt2eQuantize.vgf_8a8w
105+
106+
pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(
107+
llm_config
108+
)
109+
110+
self.assertIsNone(pt2e_quant_params)
111+
self.assertIsNone(quant_dtype)
112+
self.assertEqual(len(quantizers), 1)
113+
self.assertIsInstance(quantizers[0], VgfQuantizer)

extension/llm/export/config/llm_config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ class Pt2eQuantize(str, Enum):
349349
vulkan_8w = "vulkan_8w"
350350
tosa_8a8w = "tosa_8a8w"
351351
ethosu_8a8w = "ethosu_8a8w"
352+
vgf_8a8w = "vgf_8a8w"
352353

353354

354355
class SpinQuant(str, Enum):
@@ -558,6 +559,17 @@ class EthosUConfig:
558559
system_config: str = "default"
559560

560561

562+
@dataclass
563+
class VgfConfig:
564+
"""
565+
Configures the VGF backend.
566+
"""
567+
568+
enabled: bool = False
569+
compile_spec: Optional[str] = "TOSA-1.0+INT"
570+
compiler_flags: List[str] = field(default_factory=list)
571+
572+
561573
@dataclass
562574
class BackendConfig:
563575
"""
@@ -574,6 +586,7 @@ class BackendConfig:
574586
torchao: TorchAOKernelsConfig = field(default_factory=TorchAOKernelsConfig)
575587
tosa: TosaConfig = field(default_factory=TosaConfig)
576588
ethosu: EthosUConfig = field(default_factory=EthosUConfig)
589+
vgf: VgfConfig = field(default_factory=VgfConfig)
577590

578591

579592
################################################################################

extension/llm/export/partitioner_lib.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
from typing import Optional
8+
from typing import List, Optional
99

1010

1111
def get_xnnpack_partitioner(dynamic_quant_only_partitioner: bool = True):
@@ -255,3 +255,14 @@ def get_ethosu_partitioner(target: str):
255255
compile_spec = EthosUCompileSpec(target)
256256

257257
return EthosUPartitioner(compile_spec)
258+
259+
260+
def get_vgf_partitioner(
261+
compile_spec: Optional[str], compiler_flags: Optional[List[str]]
262+
):
263+
from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec
264+
from executorch.backends.arm.vgf.partitioner import VgfPartitioner
265+
266+
compile_spec_obj = VgfCompileSpec(compile_spec, compiler_flags)
267+
268+
return VgfPartitioner(compile_spec_obj)

extension/llm/export/quantizer_lib.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,3 +361,26 @@ def get_ethosu_quantizer(
361361
raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")
362362

363363
return quantizer
364+
365+
366+
def get_vgf_quantizer(
367+
compile_spec: Optional[str],
368+
compiler_flags: Optional[List[str]],
369+
pt2e_quantize: str,
370+
):
371+
from executorch.backends.arm.quantizer.arm_quantizer import (
372+
get_symmetric_quantization_config,
373+
VgfQuantizer,
374+
)
375+
from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec
376+
377+
compile_spec_obj = VgfCompileSpec(compile_spec, compiler_flags)
378+
379+
quantizer = VgfQuantizer(compile_spec_obj)
380+
381+
if pt2e_quantize == "vgf_8a8w":
382+
quantizer.set_global(get_symmetric_quantization_config())
383+
else:
384+
raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")
385+
386+
return quantizer

0 commit comments

Comments
 (0)