Skip to content

Commit 54b4a61

Browse files
committed
Qualcomm AI Engine Direct - Add depth_anything_v2_small to oss_scripts
Summary - Enable **depth_anything_v2_small** model (https://github.com/DepthAnything/Depth-Anything-V2) on HTP and integrates the script into `oss_scripts/`. - Provide `--dump_example_output` flag to dump the example image and export depth‑estimation images from both source model and QNN outputs. - Add unit tests to cover the added model. Test plan - Test with random images from ImageNet: `python examples/qualcomm/oss_scripts/depthanything_v2_small.py -a $ARTIFACT -d $IMAGENET_FOLDER_PATH -b build-android/ -H $HOST_NAME -s $DEVICE_ID -m $SOC_ID --seed 1126` - Test with the example image and export the post-processed source model output and QNN output into depth-estimation images: `python examples/qualcomm/oss_scripts/depthanything_v2_small.py -a $ARTIFACT -d $IMAGENET_FOLDER_PATH -b build-android/ -H $HOST_NAME -s $DEVICE_ID -m $SOC_ID --dump_example_output`
1 parent 6a2b7e6 commit 54b4a61

File tree

2 files changed

+263
-0
lines changed

2 files changed

+263
-0
lines changed

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7147,6 +7147,32 @@ def test_deit(self):
71477147
self.assertGreaterEqual(msg["top_1"], 76)
71487148
self.assertGreaterEqual(msg["top_5"], 92)
71497149

7150+
def test_depthanything_v2_small(self):
7151+
if not self.required_envs([self.image_dataset]):
7152+
self.skipTest("missing required envs")
7153+
7154+
cmds = [
7155+
"python",
7156+
f"{self.executorch_root}/examples/qualcomm/oss_scripts/depthanything_v2_small.py",
7157+
"--dataset",
7158+
self.image_dataset,
7159+
"--artifact",
7160+
self.artifact_dir,
7161+
"--build_folder",
7162+
self.build_folder,
7163+
]
7164+
self.add_default_cmds(cmds)
7165+
7166+
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
7167+
with Listener((self.ip, self.port)) as listener:
7168+
conn = listener.accept()
7169+
p.communicate()
7170+
msg = json.loads(conn.recv())
7171+
if "Error" in msg:
7172+
self.fail(msg["Error"])
7173+
else:
7174+
self.assertGreaterEqual(msg["sqnr"], 15)
7175+
71507176
def test_dino_v2(self):
71517177
if not self.required_envs([self.image_dataset]):
71527178
self.skipTest("missing required envs")
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import getpass
8+
import json
9+
import logging
10+
import os
11+
from multiprocessing.connection import Client
12+
13+
import numpy as np
14+
import requests
15+
import torch
16+
from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype
17+
from executorch.backends.qualcomm.serialization.qc_schema import (
18+
QnnExecuTorchBackendType,
19+
)
20+
21+
from executorch.examples.qualcomm.utils import (
22+
build_executorch_binary,
23+
get_backend_type,
24+
get_imagenet_dataset,
25+
make_output_dir,
26+
parse_skip_delegation_node,
27+
setup_common_args_and_variables,
28+
SimpleADB,
29+
)
30+
from PIL import Image
31+
from torchao.quantization.utils import compute_error
32+
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
33+
from transformers.modeling_outputs import DepthEstimatorOutput
34+
35+
HUGGING_FACE_DEPTHANYTHING_V2 = "depth-anything/Depth-Anything-V2-Small-hf"
36+
37+
38+
def postprocess_output_and_save(output, image_height, image_width, output_image_path):
39+
image_processor = AutoImageProcessor.from_pretrained(HUGGING_FACE_DEPTHANYTHING_V2)
40+
41+
post_processed_output = image_processor.post_process_depth_estimation(
42+
# Resize the output back to the original image dimensions and set the channel dimension to 1 as
43+
# depth‑estimation outputs are single‑channel.
44+
DepthEstimatorOutput(
45+
predicted_depth=output.reshape(1, image_height, image_width)
46+
),
47+
target_sizes=[(image_height, image_width)],
48+
)
49+
50+
predicted_depth = post_processed_output[0]["predicted_depth"]
51+
depth = (predicted_depth - predicted_depth.min()) / (
52+
predicted_depth.max() - predicted_depth.min()
53+
)
54+
depth = depth.detach().cpu().numpy() * 255
55+
depth = Image.fromarray(depth.astype("uint8"))
56+
depth.save(output_image_path)
57+
58+
59+
def main(args):
60+
if args.compile_only and args.pre_gen_pte:
61+
raise RuntimeError("Cannot set both compile_only and pre_gen_pte as true")
62+
63+
skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args)
64+
os.makedirs(args.artifact, exist_ok=True)
65+
66+
model = AutoModelForDepthEstimation.from_pretrained(
67+
HUGGING_FACE_DEPTHANYTHING_V2
68+
).eval()
69+
70+
data_num = 100
71+
if args.ci:
72+
data_num = 1
73+
inputs = [(torch.rand(1, 3, 256, 256),)]
74+
logging.warning(
75+
"This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
76+
)
77+
elif args.dump_example_output:
78+
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
79+
image = Image.open(requests.get(url, stream=True).raw)
80+
image.save(os.path.join(args.artifact, "source.png"))
81+
image_processor = AutoImageProcessor.from_pretrained(
82+
HUGGING_FACE_DEPTHANYTHING_V2
83+
)
84+
85+
pixel_values = image_processor(images=image, return_tensors="pt")[
86+
"pixel_values"
87+
]
88+
inputs = [(pixel_values,)]
89+
data_num = 1
90+
else:
91+
inputs, _ = get_imagenet_dataset(
92+
dataset_path=f"{args.dataset}",
93+
data_size=data_num,
94+
image_shape=(256, 256),
95+
)
96+
97+
goldens = []
98+
with torch.no_grad():
99+
for per_input in inputs:
100+
predicted_depth = model(*per_input).predicted_depth
101+
goldens.append(predicted_depth.flatten())
102+
103+
pte_filename = "depthanything_v2_small_qnn"
104+
# Skip lowering/compilation if using pre-generated PTE
105+
if not args.pre_gen_pte:
106+
# Lower to QNN
107+
backend = get_backend_type(args.backend)
108+
quant_dtype = {
109+
QnnExecuTorchBackendType.kGpuBackend: None,
110+
QnnExecuTorchBackendType.kHtpBackend: QuantDtype.use_8a8w,
111+
}[backend]
112+
build_executorch_binary(
113+
model,
114+
inputs[0],
115+
args.model,
116+
os.path.join(args.artifact, pte_filename),
117+
inputs,
118+
skip_node_id_set=skip_node_id_set,
119+
skip_node_op_set=skip_node_op_set,
120+
quant_dtype=quant_dtype,
121+
backend=backend,
122+
shared_buffer=args.shared_buffer,
123+
online_prepare=args.online_prepare,
124+
)
125+
126+
if args.compile_only:
127+
return
128+
129+
workspace = f"/data/local/tmp/{getpass.getuser()}/executorch/{pte_filename}"
130+
pte_path = (
131+
f"{args.pre_gen_pte}/{pte_filename}.pte"
132+
if args.pre_gen_pte
133+
else f"{args.artifact}/{pte_filename}.pte"
134+
)
135+
136+
adb = SimpleADB(
137+
qnn_sdk=os.getenv("QNN_SDK_ROOT"),
138+
build_path=f"{args.build_folder}",
139+
pte_path=pte_path,
140+
workspace=workspace,
141+
device_id=args.device,
142+
host_id=args.host,
143+
soc_model=args.model,
144+
shared_buffer=args.shared_buffer,
145+
target=args.target,
146+
)
147+
adb.push(inputs=inputs, backends={backend})
148+
adb.execute()
149+
150+
# collect output data
151+
output_data_folder = f"{args.artifact}/outputs"
152+
make_output_dir(output_data_folder)
153+
154+
adb.pull(host_output_path=args.artifact)
155+
156+
evaluations = {
157+
"sqnr": [],
158+
}
159+
for i in range(data_num):
160+
prediction = torch.from_numpy(
161+
np.fromfile(
162+
os.path.join(output_data_folder, f"output_{i}_0.raw"), dtype=np.float32
163+
)
164+
)
165+
evaluations["sqnr"].append(compute_error(goldens[i], prediction))
166+
167+
if args.dump_example_output:
168+
example_input_shape = list(inputs[0][0].shape)
169+
image_height, image_width = example_input_shape[-2], example_input_shape[-1]
170+
171+
# Post-process source model output and export the depth estimation image
172+
postprocess_output_and_save(
173+
goldens[0],
174+
image_height,
175+
image_width,
176+
os.path.join(args.artifact, "golden_depth.png"),
177+
)
178+
prediction = np.fromfile(
179+
os.path.join(output_data_folder, "output_0_0.raw"), dtype=np.float32
180+
)
181+
# Post-process QNN output and export the depth estimation image
182+
postprocess_output_and_save(
183+
torch.from_numpy(prediction),
184+
image_height,
185+
image_width,
186+
os.path.join(args.artifact, "prediction_depth.png"),
187+
)
188+
189+
evaluations["sqnr"] = sum(evaluations["sqnr"]) / data_num
190+
if args.ip and args.port != -1:
191+
with Client((args.ip, args.port)) as conn:
192+
conn.send(json.dumps({"sqnr": evaluations["sqnr"]}))
193+
else:
194+
print("SQNR(dB)={sqnr}".format(**evaluations))
195+
196+
197+
if __name__ == "__main__":
198+
parser = setup_common_args_and_variables()
199+
parser.add_argument(
200+
"-a",
201+
"--artifact",
202+
help="path for storing generated artifacts and output by this example. Default ./depthanything_v2_small",
203+
default="./depthanything_v2_small",
204+
type=str,
205+
)
206+
parser.add_argument(
207+
"-d",
208+
"--dataset",
209+
help=(
210+
"path to the validation folder of ImageNet dataset. "
211+
"e.g. --dataset imagenet-mini/val "
212+
"for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)"
213+
),
214+
type=str,
215+
required=False,
216+
)
217+
parser.add_argument(
218+
"--dump_example_output",
219+
help=(
220+
"If specified, export the example image and post-process both the source model output "
221+
"and the QNN output into depth-estimation images."
222+
),
223+
action="store_true",
224+
default=False,
225+
)
226+
227+
args = parser.parse_args()
228+
args.validate(args)
229+
230+
try:
231+
main(args)
232+
except Exception as e:
233+
if args.ip and args.port != -1:
234+
with Client((args.ip, args.port)) as conn:
235+
conn.send(json.dumps({"Error": str(e)}))
236+
else:
237+
raise Exception(e)

0 commit comments

Comments
 (0)