From 7838f111606a953a79146b6bd8a5c54284985bcb Mon Sep 17 00:00:00 2001
From: Cheng-Hsin Weng <chenweng@qti.qualcomm.com>
Date: Mon, 9 Mar 2026 16:56:21 +0800
Subject: [PATCH 1/3] Qualcomm AI Engine Direct - Add profile option in CLI
 tool to measure performance

---
 backends/qualcomm/export_utils.py            |  8 ++-
 backends/qualcomm/tests/test_qnn_delegate.py | 72 ++++++++++++++++++++
 examples/qualcomm/util_scripts/cli.py        | 48 ++++++++++++-
 3 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py
index 2c7ab2abd02..4932f8858e8 100644
--- a/backends/qualcomm/export_utils.py
+++ b/backends/qualcomm/export_utils.py
@@ -352,9 +352,11 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None):
             )
             output_callback(result)
         else:
-            subprocess.run(
+            result = subprocess.run(
                 cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout
             )
+        if result.returncode != 0:
+            raise RuntimeError(f"adb command failed: {cmds}")
 
     def push(  # noqa: C901
         self,
@@ -417,6 +419,7 @@ def execute(
         custom_runner_cmd=None,
         method_index=0,
         output_callback: Optional[Callable[[str], None]] = None,
+        iteration=1,
     ):
         self._adb(["shell", f"mkdir -p {self.output_folder}"])
         # run the delegation
@@ -436,6 +439,7 @@ def execute(
                             else ""
                         ),
                         f"--method_index {method_index}",
+                        f"--iteration {iteration}",
                     ]
                 )
                 + self.extra_cmds
@@ -687,7 +691,7 @@ def setup_common_args_and_variables():
         "-H",
         "--host",
         help="hostname where android device is connected.",
-        default=None,
+        default="localhost",
         type=str,
     )
 
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 6c8593eb755..6d2d80b7338 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -9204,6 +9204,78 @@ def test_cli_with_input_list_assignment(self):
             golden_output = ep.module()(sample_input, sample_input2)
             self._assert_outputs_equal(golden_output, device_output)
 
+    def test_cli_execute_with_profile(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            sample_input = torch.randn(1, 2, 3, 4)
+            sample_input2 = torch.randn(1, 2, 3, 4)
+            ep = torch.export.export(
+                Sub_y_x_from_x_y(), (sample_input, sample_input2)  # noqa: F405
+            )
+            torch.export.save(ep, f"{tmp_dir}/sub.pt2")
+            torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
+            torch.save(sample_input2, f"{tmp_dir}/input_0_1.pt")
+            with open(f"{tmp_dir}/input_list", "w") as f:
+                f.write(f"x:={tmp_dir}/input_0_0.pt y:={tmp_dir}/input_0_1.pt\n")
+
+            # quantize
+            cmds = [
+                "python",
+                "-m",
+                "executorch.examples.qualcomm.util_scripts.cli",
+                "quantize",
+                "--artifact",
+                f"{tmp_dir}/sub.pt2",
+                "--output_folder",
+                f"{tmp_dir}/q_out",
+                "--input_list",
+                f"{tmp_dir}/input_list",
+                "--soc_model",
+                self.soc_model,
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2"))
+            # compile
+            cmds = [
+                "python",
+                "-m",
+                "executorch.examples.qualcomm.util_scripts.cli",
+                "compile",
+                "--artifact",
+                f"{tmp_dir}/q_out/sub_quantized.pt2",
+                "--output_folder",
+                f"{tmp_dir}/c_out",
+                "--soc_model",
+                self.soc_model,
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte"))
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.svg"))
+            # execute
+            cmds = [
+                "python",
+                "-m",
+                "executorch.examples.qualcomm.util_scripts.cli",
+                "execute",
+                "--artifact",
+                f"{tmp_dir}/c_out/sub_quantized.pte",
+                "--output_folder",
+                f"{tmp_dir}/e_out",
+                "--soc_model",
+                self.soc_model,
+                "--target",
+                self.target,
+                "--device",
+                self.device,
+                "--build_folder",
+                self.build_folder,
+                "--input_list",
+                f"{tmp_dir}/input_list",
+                "--profile",
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            performance_file = f"{tmp_dir}/e_out/performance.json"
+            self.assertTrue(os.path.isfile(performance_file))
+
     def test_custom_op_1(self):
         if not self.required_envs([self.op_package_dir]):
             self.skipTest("missing required envs")
diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py
index 02af78e3dd4..4046b172254 100644
--- a/examples/qualcomm/util_scripts/cli.py
+++ b/examples/qualcomm/util_scripts/cli.py
@@ -8,7 +8,9 @@
 # and executing models under various configuration flags.
 
 import argparse
+import csv
 import importlib
+import json
 import logging
 import os
 import re
@@ -48,6 +50,7 @@
     QNN_TENSOR_TYPE_MAP,
     to_edge_transform_and_lower_to_qnn,
 )
+from executorch.devtools import Inspector
 from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
 from executorch.exir import ExecutorchBackendConfig
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
@@ -300,7 +303,13 @@ def execute(args):
         args.artifact,
         verification=Verification.Minimal,
     )
-    input_order_func = program.load_method(INPUT_ORDER)
+    try:
+        input_order_func = program.load_method(INPUT_ORDER)
+    except:
+        logger.error(
+            "Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order."
+        )
+        exit(1)
     input_order = input_order_func.execute([])
 
     # load input files
@@ -317,6 +326,8 @@ def execute(args):
             user_inputs.append(ordered_inputs)
         else:
             user_inputs.append(inputs)
+        if args.profile:
+            break
 
     logger.info("retrieving graph I/O")
     # setup compiler spec
@@ -358,7 +369,8 @@ def execute(args):
     adb.push(inputs=user_inputs, backends=[backend_type])
 
     logger.info("starting inference")
-    adb.execute()
+    iteration = 100 if args.profile else 1
+    adb.execute(iteration=iteration)
 
     tmp_dir = f"{args.output_folder}/tmp_outputs"
     os.makedirs(tmp_dir, exist_ok=True)
@@ -404,8 +416,29 @@ def post_process():
             )
             torch.save(output, f"{output_result_folder}/output_{output_index}.pt")
 
+    def post_process_etdump():
+        etdump_path = f"{args.output_folder}/etdump.etdp"
+        csv_path = f"{args.output_folder}/etdump.csv"
+        json_path = f"{args.output_folder}/performance.json"
+        inspector = Inspector(etdump_path=etdump_path)
+        inspector.save_data_to_tsv(csv_path)
+        # Create a list to hold the data
+        data = []
+        # Open the CSV file and read its contents
+        with open(csv_path, encoding="utf-8") as csv_file:
+            csv_reader = csv.DictReader(csv_file, delimiter="\t")
+            # Convert each row into a dictionary and add it to the list
+            for row in csv_reader:
+                data.append(row)
+        # Write the data to a JSON file
+        with open(json_path, "w", encoding="utf-8") as json_file:
+            json.dump(data, json_file, indent=4)
+
     logger.info("collecting output data")
-    adb.pull(host_output_path=tmp_dir, callback=post_process)
+    if args.profile:
+        adb.pull_etdump(args.output_folder, callback=post_process_etdump)
+    else:
+        adb.pull(host_output_path=tmp_dir, callback=post_process)
     shutil.rmtree(tmp_dir)
     logger.info(f"execution finished, please check {args.output_folder} for results")
 
@@ -633,6 +666,15 @@ def main():
         default="htp",
         help="Backend to be deployed ('htp'/'lpai' are currently supported).",
     )
+    sub_execute.add_argument(
+        "--profile",
+        help=(
+            "When enabled, only the first entry in input_list.txt is used for "
+            "inference. The total number of inferences is fixed at 100. In "
+            "this case, the outputs folder will not be pulled."
+        ),
+        action="store_true",
+    )
     sub_execute.set_defaults(callback=execute)
 
     args = parser.parse_args()

From e472e4ee00dd07170c8861d06e407b785e93ad1f Mon Sep 17 00:00:00 2001
From: qti-chenweng <168707118+chenweng-quic@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:22:56 +0800
Subject: [PATCH 2/3] Update export_utils.py

---
 backends/qualcomm/export_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py
index 4932f8858e8..18d1db6e254 100644
--- a/backends/qualcomm/export_utils.py
+++ b/backends/qualcomm/export_utils.py
@@ -439,7 +439,7 @@ def execute(
                             else ""
                         ),
                         f"--method_index {method_index}",
-                        f"--iteration {iteration}",
+                        "" if self.direct_build_folder else f"--iteration {iteration}",
                     ]
                 )
                 + self.extra_cmds

From 4d8d92933cb118d4c69b6681de9e81ecb046d2bb Mon Sep 17 00:00:00 2001
From: Cheng-Hsin Weng <chenweng@qti.qualcomm.com>
Date: Wed, 29 Apr 2026 23:14:24 +0800
Subject: [PATCH 3/3] fix claude comment

---
 backends/qualcomm/export_utils.py     |  2 +-
 examples/qualcomm/util_scripts/cli.py | 11 ++---------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py
index 18d1db6e254..a676cc327ab 100644
--- a/backends/qualcomm/export_utils.py
+++ b/backends/qualcomm/export_utils.py
@@ -691,7 +691,7 @@ def setup_common_args_and_variables():
         "-H",
         "--host",
         help="hostname where android device is connected.",
-        default="localhost",
+        default=None,
         type=str,
     )
 
diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py
index 4046b172254..78613c3f62a 100644
--- a/examples/qualcomm/util_scripts/cli.py
+++ b/examples/qualcomm/util_scripts/cli.py
@@ -305,7 +305,7 @@ def execute(args):
     )
     try:
         input_order_func = program.load_method(INPUT_ORDER)
-    except:
+    except Exception:
         logger.error(
             "Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order."
         )
@@ -422,15 +422,8 @@ def post_process_etdump():
         json_path = f"{args.output_folder}/performance.json"
         inspector = Inspector(etdump_path=etdump_path)
         inspector.save_data_to_tsv(csv_path)
-        # Create a list to hold the data
-        data = []
-        # Open the CSV file and read its contents
         with open(csv_path, encoding="utf-8") as csv_file:
-            csv_reader = csv.DictReader(csv_file, delimiter="\t")
-            # Convert each row into a dictionary and add it to the list
-            for row in csv_reader:
-                data.append(row)
-        # Write the data to a JSON file
+            data = list(csv.DictReader(csv_file, delimiter="\t"))
         with open(json_path, "w", encoding="utf-8") as json_file:
             json.dump(data, json_file, indent=4)