|
39 | 39 | get_openvino_partitioner, |
40 | 40 | get_qnn_partitioner, |
41 | 41 | get_tosa_partitioner, |
| 42 | + get_vgf_partitioner, |
42 | 43 | get_vulkan_partitioner, |
43 | 44 | get_xnnpack_partitioner, |
44 | 45 | ) |
|
50 | 51 | get_pt2e_quantizers, |
51 | 52 | get_qnn_quantizer, |
52 | 53 | get_tosa_quantizer, |
| 54 | + get_vgf_quantizer, |
53 | 55 | get_vulkan_quantizer, |
54 | 56 | ) |
55 | 57 | from executorch.util.activation_memory_profiler import generate_memory_trace |
@@ -824,6 +826,13 @@ def get_quantizer_and_quant_params(llm_config): |
824 | 826 | llm_config.quantization.pt2e_quantize.value, |
825 | 827 | ) |
826 | 828 | quantizers.append(ethosu_quantizer) |
| 829 | + if llm_config.backend.vgf.enabled and llm_config.quantization.pt2e_quantize: |
| 830 | + vgf_quantizer = get_vgf_quantizer( |
| 831 | + llm_config.backend.vgf.compile_spec, |
| 832 | + llm_config.backend.vgf.compiler_flags, |
| 833 | + llm_config.quantization.pt2e_quantize.value, |
| 834 | + ) |
| 835 | + quantizers.append(vgf_quantizer) |
827 | 836 | if llm_config.backend.vulkan.enabled and llm_config.quantization.pt2e_quantize: |
828 | 837 | assert ( |
829 | 838 | len(quantizers) == 0 |
@@ -1013,6 +1022,14 @@ def _to_edge_and_lower_llama_arm( |
1013 | 1022 | ) |
1014 | 1023 | ) |
1015 | 1024 | modelname = f"ethosu_{modelname}" |
| 1025 | + elif llm_config.backend.vgf.enabled: |
| 1026 | + partitioners.append( |
| 1027 | + get_vgf_partitioner( |
| 1028 | + llm_config.backend.vgf.compile_spec, |
| 1029 | + llm_config.backend.vgf.compiler_flags, |
| 1030 | + ) |
| 1031 | + ) |
| 1032 | + modelname = f"vgf_{modelname}" |
1016 | 1033 | elif llm_config.backend.tosa.enabled: |
1017 | 1034 | partitioners.append(get_tosa_partitioner(llm_config.backend.tosa.version)) |
1018 | 1035 | modelname = f"tosa_{modelname}" |
@@ -1336,7 +1353,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901 |
1336 | 1353 |
|
1337 | 1354 | # export_to_edge |
1338 | 1355 | builder_manager = _prepare_for_llama_export(llm_config) |
1339 | | - if llm_config.backend.tosa.enabled: |
| 1356 | + if ( |
| 1357 | + llm_config.backend.tosa.enabled |
| 1358 | + or llm_config.backend.vgf.enabled |
| 1359 | + or llm_config.backend.ethosu.enabled |
| 1360 | + ): |
1340 | 1361 | builder_manager.skip_dim_order = False |
1341 | 1362 | builder_exported = builder_manager.export() |
1342 | 1363 | builder_exported.run_canonical_optimizations() |
@@ -1383,7 +1404,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901 |
1383 | 1404 | openvino_device=llm_config.backend.openvino.device, |
1384 | 1405 | verbose=llm_config.debug.verbose, |
1385 | 1406 | ) |
1386 | | - elif llm_config.backend.tosa.enabled or llm_config.backend.ethosu.enabled: |
| 1407 | + elif ( |
| 1408 | + llm_config.backend.tosa.enabled |
| 1409 | + or llm_config.backend.ethosu.enabled |
| 1410 | + or llm_config.backend.vgf.enabled |
| 1411 | + ): |
1387 | 1412 | builder = _to_edge_and_lower_llama_arm( |
1388 | 1413 | builder_exported, |
1389 | 1414 | modelname, |
|
0 commit comments