From 7838f111606a953a79146b6bd8a5c54284985bcb Mon Sep 17 00:00:00 2001 From: Cheng-Hsin Weng Date: Mon, 9 Mar 2026 16:56:21 +0800 Subject: [PATCH 1/3] Qualcomm AI Engine Direct - Add profile option in CLI tool to measure performance --- backends/qualcomm/export_utils.py | 8 ++- backends/qualcomm/tests/test_qnn_delegate.py | 72 ++++++++++++++++++++ examples/qualcomm/util_scripts/cli.py | 48 ++++++++++++- 3 files changed, 123 insertions(+), 5 deletions(-) diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py index 2c7ab2abd02..4932f8858e8 100644 --- a/backends/qualcomm/export_utils.py +++ b/backends/qualcomm/export_utils.py @@ -352,9 +352,11 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None): ) output_callback(result) else: - subprocess.run( + result = subprocess.run( cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout ) + if result.returncode != 0: + raise RuntimeError(f"adb command failed: {cmds}") def push( # noqa: C901 self, @@ -417,6 +419,7 @@ def execute( custom_runner_cmd=None, method_index=0, output_callback: Optional[Callable[[str], None]] = None, + iteration=1, ): self._adb(["shell", f"mkdir -p {self.output_folder}"]) # run the delegation @@ -436,6 +439,7 @@ def execute( else "" ), f"--method_index {method_index}", + f"--iteration {iteration}", ] ) + self.extra_cmds @@ -687,7 +691,7 @@ def setup_common_args_and_variables(): "-H", "--host", help="hostname where android device is connected.", - default=None, + default="localhost", type=str, ) diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 6c8593eb755..6d2d80b7338 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -9204,6 +9204,78 @@ def test_cli_with_input_list_assignment(self): golden_output = ep.module()(sample_input, sample_input2) self._assert_outputs_equal(golden_output, device_output) + def test_cli_execute_with_profile(self): + with tempfile.TemporaryDirectory() as tmp_dir: + sample_input = torch.randn(1, 2, 3, 4) + sample_input2 = torch.randn(1, 2, 3, 4) + ep = torch.export.export( + Sub_y_x_from_x_y(), (sample_input, sample_input2) # noqa: F405 + ) + torch.export.save(ep, f"{tmp_dir}/sub.pt2") + torch.save(sample_input, f"{tmp_dir}/input_0_0.pt") + torch.save(sample_input2, f"{tmp_dir}/input_0_1.pt") + with open(f"{tmp_dir}/input_list", "w") as f: + f.write(f"x:={tmp_dir}/input_0_0.pt y:={tmp_dir}/input_0_1.pt\n") + + # quantize + cmds = [ + "python", + "-m", + "executorch.examples.qualcomm.util_scripts.cli", + "quantize", + "--artifact", + f"{tmp_dir}/sub.pt2", + "--output_folder", + f"{tmp_dir}/q_out", + "--input_list", + f"{tmp_dir}/input_list", + "--soc_model", + self.soc_model, + ] + subprocess.run(cmds, stdout=subprocess.DEVNULL) + self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2")) + # compile + cmds = [ + "python", + "-m", + "executorch.examples.qualcomm.util_scripts.cli", + "compile", + "--artifact", + f"{tmp_dir}/q_out/sub_quantized.pt2", + "--output_folder", + f"{tmp_dir}/c_out", + "--soc_model", + self.soc_model, + ] + subprocess.run(cmds, stdout=subprocess.DEVNULL) + self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte")) + self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.svg")) + # execute + cmds = [ + "python", + "-m", + "executorch.examples.qualcomm.util_scripts.cli", + "execute", + "--artifact", + f"{tmp_dir}/c_out/sub_quantized.pte", + "--output_folder", + f"{tmp_dir}/e_out", + "--soc_model", + self.soc_model, + "--target", + self.target, + "--device", + self.device, + "--build_folder", + self.build_folder, + "--input_list", + f"{tmp_dir}/input_list", + "--profile", + ] + subprocess.run(cmds, stdout=subprocess.DEVNULL) + performance_file = f"{tmp_dir}/e_out/performance.json" + self.assertTrue(os.path.isfile(performance_file)) + def test_custom_op_1(self): if not self.required_envs([self.op_package_dir]): self.skipTest("missing required envs") diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py index 02af78e3dd4..4046b172254 100644 --- a/examples/qualcomm/util_scripts/cli.py +++ b/examples/qualcomm/util_scripts/cli.py @@ -8,7 +8,9 @@ # and executing models under various configuration flags. import argparse +import csv import importlib +import json import logging import os import re @@ -48,6 +50,7 @@ QNN_TENSOR_TYPE_MAP, to_edge_transform_and_lower_to_qnn, ) +from executorch.devtools import Inspector from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary from executorch.exir import ExecutorchBackendConfig from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass @@ -300,7 +303,13 @@ def execute(args): args.artifact, verification=Verification.Minimal, ) - input_order_func = program.load_method(INPUT_ORDER) + try: + input_order_func = program.load_method(INPUT_ORDER) + except: + logger.error( + "Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order." + ) + exit(1) input_order = input_order_func.execute([]) # load input files @@ -317,6 +326,8 @@ def execute(args): user_inputs.append(ordered_inputs) else: user_inputs.append(inputs) + if args.profile: + break logger.info("retrieving graph I/O") # setup compiler spec @@ -358,7 +369,8 @@ def execute(args): adb.push(inputs=user_inputs, backends=[backend_type]) logger.info("starting inference") - adb.execute() + iteration = 100 if args.profile else 1 + adb.execute(iteration=iteration) tmp_dir = f"{args.output_folder}/tmp_outputs" os.makedirs(tmp_dir, exist_ok=True) @@ -404,8 +416,29 @@ def post_process(): ) torch.save(output, f"{output_result_folder}/output_{output_index}.pt") + def post_process_etdump(): + etdump_path = f"{args.output_folder}/etdump.etdp" + csv_path = f"{args.output_folder}/etdump.csv" + json_path = f"{args.output_folder}/performance.json" + inspector = Inspector(etdump_path=etdump_path) + inspector.save_data_to_tsv(csv_path) + # Create a list to hold the data + data = [] + # Open the CSV file and read its contents + with open(csv_path, encoding="utf-8") as csv_file: + csv_reader = csv.DictReader(csv_file, delimiter="\t") + # Convert each row into a dictionary and add it to the list + for row in csv_reader: + data.append(row) + # Write the data to a JSON file + with open(json_path, "w", encoding="utf-8") as json_file: + json.dump(data, json_file, indent=4) + logger.info("collecting output data") - adb.pull(host_output_path=tmp_dir, callback=post_process) + if args.profile: + adb.pull_etdump(args.output_folder, callback=post_process_etdump) + else: + adb.pull(host_output_path=tmp_dir, callback=post_process) shutil.rmtree(tmp_dir) logger.info(f"execution finished, please check {args.output_folder} for results") @@ -633,6 +666,15 @@ def main(): default="htp", help="Backend to be deployed ('htp'/'lpai' are currently supported).", ) + sub_execute.add_argument( + "--profile", + help=( + "When enabled, only the first entry in input_list.txt is used for " + "inference. The total number of inferences is fixed at 100. In " + "this case, the outputs folder will not be pulled." + ), + action="store_true", + ) sub_execute.set_defaults(callback=execute) args = parser.parse_args() From e472e4ee00dd07170c8861d06e407b785e93ad1f Mon Sep 17 00:00:00 2001 From: qti-chenweng <168707118+chenweng-quic@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:22:56 +0800 Subject: [PATCH 2/3] Update export_utils.py --- backends/qualcomm/export_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py index 4932f8858e8..18d1db6e254 100644 --- a/backends/qualcomm/export_utils.py +++ b/backends/qualcomm/export_utils.py @@ -439,7 +439,7 @@ def execute( else "" ), f"--method_index {method_index}", - f"--iteration {iteration}", + "" if self.direct_build_folder else f"--iteration {iteration}", ] ) + self.extra_cmds From 4d8d92933cb118d4c69b6681de9e81ecb046d2bb Mon Sep 17 00:00:00 2001 From: Cheng-Hsin Weng Date: Wed, 29 Apr 2026 23:14:24 +0800 Subject: [PATCH 3/3] fix claude comment --- backends/qualcomm/export_utils.py | 2 +- examples/qualcomm/util_scripts/cli.py | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py index 18d1db6e254..a676cc327ab 100644 --- a/backends/qualcomm/export_utils.py +++ b/backends/qualcomm/export_utils.py @@ -691,7 +691,7 @@ def setup_common_args_and_variables(): "-H", "--host", help="hostname where android device is connected.", - default="localhost", + default=None, type=str, ) diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py index 4046b172254..78613c3f62a 100644 --- a/examples/qualcomm/util_scripts/cli.py +++ b/examples/qualcomm/util_scripts/cli.py @@ -305,7 +305,7 @@ def execute(args): ) try: input_order_func = program.load_method(INPUT_ORDER) - except: + except Exception: logger.error( "Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order." ) @@ -422,15 +422,8 @@ def post_process_etdump(): json_path = f"{args.output_folder}/performance.json" inspector = Inspector(etdump_path=etdump_path) inspector.save_data_to_tsv(csv_path) - # Create a list to hold the data - data = [] - # Open the CSV file and read its contents with open(csv_path, encoding="utf-8") as csv_file: - csv_reader = csv.DictReader(csv_file, delimiter="\t") - # Convert each row into a dictionary and add it to the list - for row in csv_reader: - data.append(row) - # Write the data to a JSON file + data = list(csv.DictReader(csv_file, delimiter="\t")) with open(json_path, "w", encoding="utf-8") as json_file: json.dump(data, json_file, indent=4)