Skip to content

Commit 1dadd62

Browse files
committed
Qualcomm AI Engine Direct - Add profile option in CLI tool to measure performance
1 parent 8e5ec80 commit 1dadd62

3 files changed

Lines changed: 123 additions & 5 deletions

File tree

backends/qualcomm/export_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,11 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None):
352352
)
353353
output_callback(result)
354354
else:
355-
subprocess.run(
355+
result = subprocess.run(
356356
cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout
357357
)
358+
if result.returncode != 0:
359+
raise RuntimeError(f"adb command failed: {cmds}")
358360

359361
def push( # noqa: C901
360362
self,
@@ -417,6 +419,7 @@ def execute(
417419
custom_runner_cmd=None,
418420
method_index=0,
419421
output_callback: Optional[Callable[[str], None]] = None,
422+
iteration=1,
420423
):
421424
self._adb(["shell", f"mkdir -p {self.output_folder}"])
422425
# run the delegation
@@ -436,6 +439,7 @@ def execute(
436439
else ""
437440
),
438441
f"--method_index {method_index}",
442+
f"--iteration {iteration}",
439443
]
440444
)
441445
+ self.extra_cmds
@@ -687,7 +691,7 @@ def setup_common_args_and_variables():
687691
"-H",
688692
"--host",
689693
help="hostname where android device is connected.",
690-
default=None,
694+
default="localhost",
691695
type=str,
692696
)
693697

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8965,6 +8965,78 @@ def test_cli_with_input_list_assignment(self):
89658965
golden_output = ep.module()(sample_input, sample_input2)
89668966
self._assert_outputs_equal(golden_output, device_output)
89678967

8968+
def test_cli_execute_with_profile(self):
8969+
with tempfile.TemporaryDirectory() as tmp_dir:
8970+
sample_input = torch.randn(1, 2, 3, 4)
8971+
sample_input2 = torch.randn(1, 2, 3, 4)
8972+
ep = torch.export.export(
8973+
Sub_y_x_from_x_y(), (sample_input, sample_input2) # noqa: F405
8974+
)
8975+
torch.export.save(ep, f"{tmp_dir}/sub.pt2")
8976+
torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
8977+
torch.save(sample_input2, f"{tmp_dir}/input_0_1.pt")
8978+
with open(f"{tmp_dir}/input_list", "w") as f:
8979+
f.write(f"x:={tmp_dir}/input_0_0.pt y:={tmp_dir}/input_0_1.pt\n")
8980+
8981+
# quantize
8982+
cmds = [
8983+
"python",
8984+
"-m",
8985+
"executorch.examples.qualcomm.util_scripts.cli",
8986+
"quantize",
8987+
"--artifact",
8988+
f"{tmp_dir}/sub.pt2",
8989+
"--output_folder",
8990+
f"{tmp_dir}/q_out",
8991+
"--input_list",
8992+
f"{tmp_dir}/input_list",
8993+
"--soc_model",
8994+
self.soc_model,
8995+
]
8996+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
8997+
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2"))
8998+
# compile
8999+
cmds = [
9000+
"python",
9001+
"-m",
9002+
"executorch.examples.qualcomm.util_scripts.cli",
9003+
"compile",
9004+
"--artifact",
9005+
f"{tmp_dir}/q_out/sub_quantized.pt2",
9006+
"--output_folder",
9007+
f"{tmp_dir}/c_out",
9008+
"--soc_model",
9009+
self.soc_model,
9010+
]
9011+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
9012+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte"))
9013+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.svg"))
9014+
# execute
9015+
cmds = [
9016+
"python",
9017+
"-m",
9018+
"executorch.examples.qualcomm.util_scripts.cli",
9019+
"execute",
9020+
"--artifact",
9021+
f"{tmp_dir}/c_out/sub_quantized.pte",
9022+
"--output_folder",
9023+
f"{tmp_dir}/e_out",
9024+
"--soc_model",
9025+
self.soc_model,
9026+
"--target",
9027+
self.target,
9028+
"--device",
9029+
self.device,
9030+
"--build_folder",
9031+
self.build_folder,
9032+
"--input_list",
9033+
f"{tmp_dir}/input_list",
9034+
"--profile",
9035+
]
9036+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
9037+
performance_file = f"{tmp_dir}/e_out/performance.json"
9038+
self.assertTrue(os.path.isfile(performance_file))
9039+
89689040
def test_custom_op(self):
89699041
if not self.required_envs([self.op_package_dir]):
89709042
self.skipTest("missing required envs")

examples/qualcomm/util_scripts/cli.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
# and executing models under various configuration flags.
99

1010
import argparse
11+
import csv
1112
import importlib
13+
import json
1214
import logging
1315
import os
1416
import re
@@ -48,6 +50,7 @@
4850
QNN_TENSOR_TYPE_MAP,
4951
to_edge_transform_and_lower_to_qnn,
5052
)
53+
from executorch.devtools import Inspector
5154
from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
5255
from executorch.exir import ExecutorchBackendConfig
5356
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
@@ -300,7 +303,13 @@ def execute(args):
300303
args.artifact,
301304
verification=Verification.Minimal,
302305
)
303-
input_order_func = program.load_method(INPUT_ORDER)
306+
try:
307+
input_order_func = program.load_method(INPUT_ORDER)
308+
except:
309+
logger.error(
310+
"Missing INPUT_ORDER in the .pte. The CLI execute command only supports .pte files generated by the CLI compile command, which preserves the input order."
311+
)
312+
exit(1)
304313
input_order = input_order_func.execute([])
305314

306315
# load input files
@@ -317,6 +326,8 @@ def execute(args):
317326
user_inputs.append(ordered_inputs)
318327
else:
319328
user_inputs.append(inputs)
329+
if args.profile:
330+
break
320331

321332
logger.info("retrieving graph I/O")
322333
# setup compiler spec
@@ -358,7 +369,8 @@ def execute(args):
358369
adb.push(inputs=user_inputs, backends=[backend_type])
359370

360371
logger.info("starting inference")
361-
adb.execute()
372+
iteration = 100 if args.profile else 1
373+
adb.execute(iteration=iteration)
362374

363375
tmp_dir = f"{args.output_folder}/tmp_outputs"
364376
os.makedirs(tmp_dir, exist_ok=True)
@@ -404,8 +416,29 @@ def post_process():
404416
)
405417
torch.save(output, f"{output_result_folder}/output_{output_index}.pt")
406418

419+
def post_process_etdump():
420+
etdump_path = f"{args.output_folder}/etdump.etdp"
421+
csv_path = f"{args.output_folder}/etdump.csv"
422+
json_path = f"{args.output_folder}/performance.json"
423+
inspector = Inspector(etdump_path=etdump_path)
424+
inspector.save_data_to_tsv(csv_path)
425+
# Create a list to hold the data
426+
data = []
427+
# Open the CSV file and read its contents
428+
with open(csv_path, encoding="utf-8") as csv_file:
429+
csv_reader = csv.DictReader(csv_file, delimiter="\t")
430+
# Convert each row into a dictionary and add it to the list
431+
for row in csv_reader:
432+
data.append(row)
433+
# Write the data to a JSON file
434+
with open(json_path, "w", encoding="utf-8") as json_file:
435+
json.dump(data, json_file, indent=4)
436+
407437
logger.info("collecting output data")
408-
adb.pull(host_output_path=tmp_dir, callback=post_process)
438+
if args.profile:
439+
adb.pull_etdump(args.output_folder, callback=post_process_etdump)
440+
else:
441+
adb.pull(host_output_path=tmp_dir, callback=post_process)
409442
shutil.rmtree(tmp_dir)
410443
logger.info(f"execution finished, please check {args.output_folder} for results")
411444

@@ -633,6 +666,15 @@ def main():
633666
default="htp",
634667
help="Backend to be deployed ('htp'/'lpai' are currently supported).",
635668
)
669+
sub_execute.add_argument(
670+
"--profile",
671+
help=(
672+
"When enabled, only the first entry in input_list.txt is used for "
673+
"inference. The total number of inferences is fixed at 100. In "
674+
"this case, the outputs folder will not be pulled."
675+
),
676+
action="store_true",
677+
)
636678
sub_execute.set_defaults(callback=execute)
637679

638680
args = parser.parse_args()

0 commit comments

Comments
 (0)