Skip to content

Commit b778155

Browse files
Add Ethos-U to LLM extension (#17700)
Signed-off-by: Sebastian Larsson <sebastian.larsson@arm.com>
1 parent e95555a commit b778155

5 files changed

Lines changed: 91 additions & 9 deletions

File tree

examples/models/llama/export_llama_lib.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from executorch.extension.llm.export.config.llm_config import LlmConfig
3535
from executorch.extension.llm.export.partitioner_lib import (
3636
get_coreml_partitioner,
37+
get_ethosu_partitioner,
3738
get_mps_partitioner,
3839
get_openvino_partitioner,
3940
get_qnn_partitioner,
@@ -43,6 +44,7 @@
4344
)
4445
from executorch.extension.llm.export.quantizer_lib import (
4546
get_coreml_quantizer,
47+
get_ethosu_quantizer,
4648
get_ov_quantizer,
4749
get_pt2e_quantization_params,
4850
get_pt2e_quantizers,
@@ -218,6 +220,7 @@ def build_args_parser() -> argparse.ArgumentParser:
218220
"coreml_baseline_8a_c4w",
219221
"vulkan_8w",
220222
"tosa_8a8w",
223+
"ethosu_8a8w",
221224
],
222225
help="Use PT2E quantization. Comma separated options. e.g. xnnpack_dynamic (for per channel 8 bit weight), xnnpack_dynamic_qc4 (for per channel 4 bit weight), embedding.",
223226
)
@@ -813,6 +816,14 @@ def get_quantizer_and_quant_params(llm_config):
813816
llm_config.backend.tosa.version, llm_config.quantization.pt2e_quantize.value
814817
)
815818
quantizers.append(tosa_quantizer)
819+
if llm_config.backend.ethosu.enabled and llm_config.quantization.pt2e_quantize:
820+
ethosu_quantizer = get_ethosu_quantizer(
821+
llm_config.backend.ethosu.target,
822+
llm_config.backend.ethosu.system_config,
823+
llm_config.backend.ethosu.memory_mode,
824+
llm_config.quantization.pt2e_quantize.value,
825+
)
826+
quantizers.append(ethosu_quantizer)
816827
if llm_config.backend.vulkan.enabled and llm_config.quantization.pt2e_quantize:
817828
assert (
818829
len(quantizers) == 0
@@ -984,20 +995,27 @@ def _to_edge_and_lower_llama_openvino(
984995
return builder.to_executorch(passes=additional_passes)
985996

986997

987-
def _to_edge_and_lower_llama_tosa(
998+
def _to_edge_and_lower_llama_arm(
988999
builder_exported,
9891000
modelname,
9901001
quantizers,
9911002
additional_passes,
992-
tosa_spec,
1003+
llm_config: LlmConfig,
9931004
verbose: bool = False,
9941005
) -> LLMEdgeManager:
9951006
logging.info("Lowering model using TOSA partitioner")
9961007

9971008
partitioners = []
998-
partitioners.append(get_tosa_partitioner(tosa_spec))
999-
1000-
modelname = f"tosa_{modelname}"
1009+
if llm_config.backend.ethosu.enabled:
1010+
partitioners.append(
1011+
get_ethosu_partitioner(
1012+
llm_config.backend.ethosu.target,
1013+
)
1014+
)
1015+
modelname = f"ethosu_{modelname}"
1016+
elif llm_config.backend.tosa.enabled:
1017+
partitioners.append(get_tosa_partitioner(llm_config.backend.tosa.version))
1018+
modelname = f"tosa_{modelname}"
10011019

10021020
builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower(
10031021
partitioners
@@ -1365,13 +1383,13 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
13651383
openvino_device=llm_config.backend.openvino.device,
13661384
verbose=llm_config.debug.verbose,
13671385
)
1368-
elif llm_config.backend.tosa.enabled:
1369-
builder = _to_edge_and_lower_llama_tosa(
1386+
elif llm_config.backend.tosa.enabled or llm_config.backend.ethosu.enabled:
1387+
builder = _to_edge_and_lower_llama_arm(
13701388
builder_exported,
13711389
modelname,
13721390
quantizers,
13731391
additional_passes,
1374-
llm_config.backend.tosa.version,
1392+
llm_config,
13751393
verbose=llm_config.debug.verbose,
13761394
)
13771395
else:

examples/models/llama/tests/test_export_llama_lib.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,17 @@
1010
from executorch.devtools.backend_debug import get_delegation_info
1111

1212
try:
13-
from executorch.backends.arm.quantizer.arm_quantizer import TOSAQuantizer
13+
from executorch.backends.arm.quantizer.arm_quantizer import (
14+
EthosUQuantizer,
15+
TOSAQuantizer,
16+
)
1417

1518
HAS_ARM_BACKEND = True
1619
except ImportError:
1720
HAS_ARM_BACKEND = False
21+
EthosUQuantizer = None
1822
TOSAQuantizer = None
23+
1924
from executorch.examples.models.llama.export_llama_lib import (
2025
_export_llama,
2126
build_args_parser,
@@ -73,3 +78,18 @@ def test_get_quantizer_and_quant_params_returns_tosa_quantizer(self):
7378
self.assertIsNone(quant_dtype)
7479
self.assertEqual(len(quantizers), 1)
7580
self.assertIsInstance(quantizers[0], TOSAQuantizer)
81+
82+
@unittest.skipUnless(HAS_ARM_BACKEND, "ARM backend not available")
83+
def test_get_quantizer_and_quant_params_returns_ethosu_quantizer(self):
84+
llm_config = LlmConfig()
85+
llm_config.backend.ethosu.enabled = True
86+
llm_config.quantization.pt2e_quantize = Pt2eQuantize.ethosu_8a8w
87+
88+
pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(
89+
llm_config
90+
)
91+
92+
self.assertIsNone(pt2e_quant_params)
93+
self.assertIsNone(quant_dtype)
94+
self.assertEqual(len(quantizers), 1)
95+
self.assertIsInstance(quantizers[0], EthosUQuantizer)

extension/llm/export/config/llm_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ class Pt2eQuantize(str, Enum):
348348
coreml_baseline_8a_c4w = "coreml_baseline_8a_c4w"
349349
vulkan_8w = "vulkan_8w"
350350
tosa_8a8w = "tosa_8a8w"
351+
ethosu_8a8w = "ethosu_8a8w"
351352

352353

353354
class SpinQuant(str, Enum):
@@ -545,6 +546,18 @@ class TosaConfig:
545546
version: str = "TOSA-1.0+INT"
546547

547548

549+
@dataclass
550+
class EthosUConfig:
551+
"""
552+
Configures the Ethos-U backend.
553+
"""
554+
555+
enabled: bool = False
556+
target: str = "ethos-u85-128" # Default target, can be overridden.
557+
memory_mode: str = "default"
558+
system_config: str = "default"
559+
560+
548561
@dataclass
549562
class BackendConfig:
550563
"""
@@ -560,6 +573,7 @@ class BackendConfig:
560573
openvino: OpenvinoConfig = field(default_factory=OpenvinoConfig)
561574
torchao: TorchAOKernelsConfig = field(default_factory=TorchAOKernelsConfig)
562575
tosa: TosaConfig = field(default_factory=TosaConfig)
576+
ethosu: EthosUConfig = field(default_factory=EthosUConfig)
563577

564578

565579
################################################################################

extension/llm/export/partitioner_lib.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,12 @@ def get_tosa_partitioner(version: str):
246246
compile_spec = TosaCompileSpec(version)
247247

248248
return TOSAPartitioner(compile_spec)
249+
250+
251+
def get_ethosu_partitioner(target: str):
252+
from executorch.backends.arm.ethosu.compile_spec import EthosUCompileSpec
253+
from executorch.backends.arm.ethosu.partitioner import EthosUPartitioner
254+
255+
compile_spec = EthosUCompileSpec(target)
256+
257+
return EthosUPartitioner(compile_spec)

extension/llm/export/quantizer_lib.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,3 +340,24 @@ def get_tosa_quantizer(version: str, pt2e_quantize: str):
340340
raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")
341341

342342
return quantizer
343+
344+
345+
def get_ethosu_quantizer(
346+
target: str, system_config: str, memory_mode: str, pt2e_quantize: str
347+
):
348+
from executorch.backends.arm.ethosu.compile_spec import EthosUCompileSpec
349+
from executorch.backends.arm.quantizer.arm_quantizer import (
350+
EthosUQuantizer,
351+
get_symmetric_quantization_config,
352+
)
353+
354+
compile_spec = EthosUCompileSpec(target, system_config, memory_mode)
355+
356+
quantizer = EthosUQuantizer(compile_spec)
357+
358+
if pt2e_quantize == "ethosu_8a8w":
359+
quantizer.set_global(get_symmetric_quantization_config())
360+
else:
361+
raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")
362+
363+
return quantizer

0 commit comments

Comments
 (0)