Skip to content

Commit 22e9347

Browse files
Qualcomm AI Engine Direct - Update CLI tool to dump inference performace (#18085)
1 parent d744d6a commit 22e9347

3 files changed

Lines changed: 115 additions & 4 deletions

File tree

backends/qualcomm/export_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,11 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None):
352352
)
353353
output_callback(result)
354354
else:
355-
subprocess.run(
355+
result = subprocess.run(
356356
cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout
357357
)
358+
if result.returncode != 0:
359+
raise RuntimeError(f"adb command failed: {cmds}")
358360

359361
def push( # noqa: C901
360362
self,
@@ -417,6 +419,7 @@ def execute(
417419
custom_runner_cmd=None,
418420
method_index=0,
419421
output_callback: Optional[Callable[[str], None]] = None,
422+
iteration=1,
420423
):
421424
self._adb(["shell", f"mkdir -p {self.output_folder}"])
422425
# run the delegation
@@ -436,6 +439,7 @@ def execute(
436439
else ""
437440
),
438441
f"--method_index {method_index}",
442+
"" if self.direct_build_folder else f"--iteration {iteration}",
439443
]
440444
)
441445
+ self.extra_cmds

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9204,6 +9204,78 @@ def test_cli_with_input_list_assignment(self):
92049204
golden_output = ep.module()(sample_input, sample_input2)
92059205
self._assert_outputs_equal(golden_output, device_output)
92069206

9207+
def test_cli_execute_with_profile(self):
9208+
with tempfile.TemporaryDirectory() as tmp_dir:
9209+
sample_input = torch.randn(1, 2, 3, 4)
9210+
sample_input2 = torch.randn(1, 2, 3, 4)
9211+
ep = torch.export.export(
9212+
Sub_y_x_from_x_y(), (sample_input, sample_input2) # noqa: F405
9213+
)
9214+
torch.export.save(ep, f"{tmp_dir}/sub.pt2")
9215+
torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
9216+
torch.save(sample_input2, f"{tmp_dir}/input_0_1.pt")
9217+
with open(f"{tmp_dir}/input_list", "w") as f:
9218+
f.write(f"x:={tmp_dir}/input_0_0.pt y:={tmp_dir}/input_0_1.pt\n")
9219+
9220+
# quantize
9221+
cmds = [
9222+
"python",
9223+
"-m",
9224+
"executorch.examples.qualcomm.util_scripts.cli",
9225+
"quantize",
9226+
"--artifact",
9227+
f"{tmp_dir}/sub.pt2",
9228+
"--output_folder",
9229+
f"{tmp_dir}/q_out",
9230+
"--input_list",
9231+
f"{tmp_dir}/input_list",
9232+
"--soc_model",
9233+
self.soc_model,
9234+
]
9235+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
9236+
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2"))
9237+
# compile
9238+
cmds = [
9239+
"python",
9240+
"-m",
9241+
"executorch.examples.qualcomm.util_scripts.cli",
9242+
"compile",
9243+
"--artifact",
9244+
f"{tmp_dir}/q_out/sub_quantized.pt2",
9245+
"--output_folder",
9246+
f"{tmp_dir}/c_out",
9247+
"--soc_model",
9248+
self.soc_model,
9249+
]
9250+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
9251+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte"))
9252+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.svg"))
9253+
# execute
9254+
cmds = [
9255+
"python",
9256+
"-m",
9257+
"executorch.examples.qualcomm.util_scripts.cli",
9258+
"execute",
9259+
"--artifact",
9260+
f"{tmp_dir}/c_out/sub_quantized.pte",
9261+
"--output_folder",
9262+
f"{tmp_dir}/e_out",
9263+
"--soc_model",
9264+
self.soc_model,
9265+
"--target",
9266+
self.target,
9267+
"--device",
9268+
self.device,
9269+
"--build_folder",
9270+
self.build_folder,
9271+
"--input_list",
9272+
f"{tmp_dir}/input_list",
9273+
"--profile",
9274+
]
9275+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
9276+
performance_file = f"{tmp_dir}/e_out/performance.json"
9277+
self.assertTrue(os.path.isfile(performance_file))
9278+
92079279
def test_custom_op_1(self):
92089280
if not self.required_envs([self.op_package_dir]):
92099281
self.skipTest("missing required envs")

examples/qualcomm/util_scripts/cli.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
# and executing models under various configuration flags.
99

1010
import argparse
11+
import csv
1112
import importlib
13+
import json
1214
import logging
1315
import os
1416
import re
@@ -48,6 +50,7 @@
4850
QNN_TENSOR_TYPE_MAP,
4951
to_edge_transform_and_lower_to_qnn,
5052
)
53+
from executorch.devtools import Inspector
5154
from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
5255
from executorch.exir import ExecutorchBackendConfig
5356
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
@@ -300,7 +303,13 @@ def execute(args):
300303
args.artifact,
301304
verification=Verification.Minimal,
302305
)
303-
input_order_func = program.load_method(INPUT_ORDER)
306+
try:
307+
input_order_func = program.load_method(INPUT_ORDER)
308+
except Exception:
309+
logger.error(
310+
"Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order."
311+
)
312+
exit(1)
304313
input_order = input_order_func.execute([])
305314

306315
# load input files
@@ -317,6 +326,8 @@ def execute(args):
317326
user_inputs.append(ordered_inputs)
318327
else:
319328
user_inputs.append(inputs)
329+
if args.profile:
330+
break
320331

321332
logger.info("retrieving graph I/O")
322333
# setup compiler spec
@@ -358,7 +369,8 @@ def execute(args):
358369
adb.push(inputs=user_inputs, backends=[backend_type])
359370

360371
logger.info("starting inference")
361-
adb.execute()
372+
iteration = 100 if args.profile else 1
373+
adb.execute(iteration=iteration)
362374

363375
tmp_dir = f"{args.output_folder}/tmp_outputs"
364376
os.makedirs(tmp_dir, exist_ok=True)
@@ -404,8 +416,22 @@ def post_process():
404416
)
405417
torch.save(output, f"{output_result_folder}/output_{output_index}.pt")
406418

419+
def post_process_etdump():
420+
etdump_path = f"{args.output_folder}/etdump.etdp"
421+
csv_path = f"{args.output_folder}/etdump.csv"
422+
json_path = f"{args.output_folder}/performance.json"
423+
inspector = Inspector(etdump_path=etdump_path)
424+
inspector.save_data_to_tsv(csv_path)
425+
with open(csv_path, encoding="utf-8") as csv_file:
426+
data = list(csv.DictReader(csv_file, delimiter="\t"))
427+
with open(json_path, "w", encoding="utf-8") as json_file:
428+
json.dump(data, json_file, indent=4)
429+
407430
logger.info("collecting output data")
408-
adb.pull(host_output_path=tmp_dir, callback=post_process)
431+
if args.profile:
432+
adb.pull_etdump(args.output_folder, callback=post_process_etdump)
433+
else:
434+
adb.pull(host_output_path=tmp_dir, callback=post_process)
409435
shutil.rmtree(tmp_dir)
410436
logger.info(f"execution finished, please check {args.output_folder} for results")
411437

@@ -633,6 +659,15 @@ def main():
633659
default="htp",
634660
help="Backend to be deployed ('htp'/'lpai' are currently supported).",
635661
)
662+
sub_execute.add_argument(
663+
"--profile",
664+
help=(
665+
"When enabled, only the first entry in input_list.txt is used for "
666+
"inference. The total number of inferences is fixed at 100. In "
667+
"this case, the outputs folder will not be pulled."
668+
),
669+
action="store_true",
670+
)
636671
sub_execute.set_defaults(callback=execute)
637672

638673
args = parser.parse_args()

0 commit comments

Comments
 (0)