pytorch · abhinaykukkadapu · Apr 29, 2026 · Mar 9, 2026 · Apr 29, 2026 · Apr 29, 2026
@@ -352,9 +352,11 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None):
             )
             output_callback(result)
         else:
-            subprocess.run(
+            result = subprocess.run(
                 cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout
             )
+        if result.returncode != 0:
+            raise RuntimeError(f"adb command failed: {cmds}")
 
     def push(  # noqa: C901
         self,
@@ -417,6 +419,7 @@ def execute(
         custom_runner_cmd=None,
         method_index=0,
         output_callback: Optional[Callable[[str], None]] = None,
+        iteration=1,
     ):
         self._adb(["shell", f"mkdir -p {self.output_folder}"])
         # run the delegation
@@ -436,6 +439,7 @@ def execute(
                             else ""
                         ),
                         f"--method_index {method_index}",
+                        "" if self.direct_build_folder else f"--iteration {iteration}",
                     ]
                 )
                 + self.extra_cmds

@@ -9204,6 +9204,78 @@ def test_cli_with_input_list_assignment(self):
             golden_output = ep.module()(sample_input, sample_input2)
             self._assert_outputs_equal(golden_output, device_output)
 
+    def test_cli_execute_with_profile(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            sample_input = torch.randn(1, 2, 3, 4)
+            sample_input2 = torch.randn(1, 2, 3, 4)
+            ep = torch.export.export(
+                Sub_y_x_from_x_y(), (sample_input, sample_input2)  # noqa: F405
+            )
+            torch.export.save(ep, f"{tmp_dir}/sub.pt2")
+            torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
+            torch.save(sample_input2, f"{tmp_dir}/input_0_1.pt")
+            with open(f"{tmp_dir}/input_list", "w") as f:
+                f.write(f"x:={tmp_dir}/input_0_0.pt y:={tmp_dir}/input_0_1.pt\n")
+
+            # quantize
+            cmds = [
+                "python",
+                "-m",
+                "executorch.examples.qualcomm.util_scripts.cli",
+                "quantize",
+                "--artifact",
+                f"{tmp_dir}/sub.pt2",
+                "--output_folder",
+                f"{tmp_dir}/q_out",
+                "--input_list",
+                f"{tmp_dir}/input_list",
+                "--soc_model",
+                self.soc_model,
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2"))
+            # compile
+            cmds = [
+                "python",
+                "-m",
+                "executorch.examples.qualcomm.util_scripts.cli",
+                "compile",
+                "--artifact",
+                f"{tmp_dir}/q_out/sub_quantized.pt2",
+                "--output_folder",
+                f"{tmp_dir}/c_out",
+                "--soc_model",
+                self.soc_model,
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte"))
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.svg"))
+            # execute
+            cmds = [
+                "python",
+                "-m",
+                "executorch.examples.qualcomm.util_scripts.cli",
+                "execute",
+                "--artifact",
+                f"{tmp_dir}/c_out/sub_quantized.pte",
+                "--output_folder",
+                f"{tmp_dir}/e_out",
+                "--soc_model",
+                self.soc_model,
+                "--target",
+                self.target,
+                "--device",
+                self.device,
+                "--build_folder",
+                self.build_folder,
+                "--input_list",
+                f"{tmp_dir}/input_list",
+                "--profile",
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            performance_file = f"{tmp_dir}/e_out/performance.json"
+            self.assertTrue(os.path.isfile(performance_file))
+
     def test_custom_op_1(self):
         if not self.required_envs([self.op_package_dir]):
             self.skipTest("missing required envs")

@@ -8,7 +8,9 @@
 # and executing models under various configuration flags.
 
 import argparse
+import csv
 import importlib
+import json
 import logging
 import os
 import re
@@ -48,6 +50,7 @@
     QNN_TENSOR_TYPE_MAP,
     to_edge_transform_and_lower_to_qnn,
 )
+from executorch.devtools import Inspector
 from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
 from executorch.exir import ExecutorchBackendConfig
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
@@ -300,7 +303,13 @@ def execute(args):
         args.artifact,
         verification=Verification.Minimal,
     )
-    input_order_func = program.load_method(INPUT_ORDER)
+    try:
+        input_order_func = program.load_method(INPUT_ORDER)
+    except Exception:
+        logger.error(
+            "Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order."
+        )
+        exit(1)
     input_order = input_order_func.execute([])
 
     # load input files
@@ -317,6 +326,8 @@ def execute(args):
             user_inputs.append(ordered_inputs)
         else:
             user_inputs.append(inputs)
+        if args.profile:
+            break
 
     logger.info("retrieving graph I/O")
     # setup compiler spec
@@ -358,7 +369,8 @@ def execute(args):
     adb.push(inputs=user_inputs, backends=[backend_type])
 
     logger.info("starting inference")
-    adb.execute()
+    iteration = 100 if args.profile else 1
+    adb.execute(iteration=iteration)
 
     tmp_dir = f"{args.output_folder}/tmp_outputs"
     os.makedirs(tmp_dir, exist_ok=True)
@@ -404,8 +416,22 @@ def post_process():
             )
             torch.save(output, f"{output_result_folder}/output_{output_index}.pt")
 
+    def post_process_etdump():
+        etdump_path = f"{args.output_folder}/etdump.etdp"
+        csv_path = f"{args.output_folder}/etdump.csv"
+        json_path = f"{args.output_folder}/performance.json"
+        inspector = Inspector(etdump_path=etdump_path)
+        inspector.save_data_to_tsv(csv_path)
+        with open(csv_path, encoding="utf-8") as csv_file:
+            data = list(csv.DictReader(csv_file, delimiter="\t"))
+        with open(json_path, "w", encoding="utf-8") as json_file:
+            json.dump(data, json_file, indent=4)
+
     logger.info("collecting output data")
-    adb.pull(host_output_path=tmp_dir, callback=post_process)
+    if args.profile:
+        adb.pull_etdump(args.output_folder, callback=post_process_etdump)
+    else:
+        adb.pull(host_output_path=tmp_dir, callback=post_process)
     shutil.rmtree(tmp_dir)
     logger.info(f"execution finished, please check {args.output_folder} for results")
 
@@ -633,6 +659,15 @@ def main():
         default="htp",
         help="Backend to be deployed ('htp'/'lpai' are currently supported).",
     )
+    sub_execute.add_argument(
+        "--profile",
+        help=(
+            "When enabled, only the first entry in input_list.txt is used for "
+            "inference. The total number of inferences is fixed at 100. In "
+            "this case, the outputs folder will not be pulled."
+        ),
+        action="store_true",
+    )
     sub_execute.set_defaults(callback=execute)
 
     args = parser.parse_args()