|
| 1 | +# Copyright (c) Qualcomm Innovation Center, Inc. |
| 2 | +# All rights reserved |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +import getpass |
| 8 | +import json |
| 9 | +import logging |
| 10 | +import os |
| 11 | +from multiprocessing.connection import Client |
| 12 | + |
| 13 | +import numpy as np |
| 14 | +import requests |
| 15 | +import torch |
| 16 | +from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype |
| 17 | +from executorch.backends.qualcomm.serialization.qc_schema import ( |
| 18 | + QnnExecuTorchBackendType, |
| 19 | +) |
| 20 | + |
| 21 | +from executorch.examples.qualcomm.utils import ( |
| 22 | + build_executorch_binary, |
| 23 | + get_backend_type, |
| 24 | + get_imagenet_dataset, |
| 25 | + make_output_dir, |
| 26 | + parse_skip_delegation_node, |
| 27 | + setup_common_args_and_variables, |
| 28 | + SimpleADB, |
| 29 | +) |
| 30 | +from PIL import Image |
| 31 | +from torchao.quantization.utils import compute_error |
| 32 | +from transformers import AutoImageProcessor, AutoModelForDepthEstimation |
| 33 | +from transformers.modeling_outputs import DepthEstimatorOutput |
| 34 | + |
| 35 | +HUGGING_FACE_DEPTHANYTHING_V2 = "depth-anything/Depth-Anything-V2-Small-hf" |
| 36 | + |
| 37 | + |
| 38 | +def postprocess_output_and_save(output, image_height, image_width, output_image_path): |
| 39 | + image_processor = AutoImageProcessor.from_pretrained(HUGGING_FACE_DEPTHANYTHING_V2) |
| 40 | + |
| 41 | + post_processed_output = image_processor.post_process_depth_estimation( |
| 42 | + # Resize the output back to the original image dimensions and set the channel dimension to 1 as |
| 43 | + # depth‑estimation outputs are single‑channel. |
| 44 | + DepthEstimatorOutput( |
| 45 | + predicted_depth=output.reshape(1, image_height, image_width) |
| 46 | + ), |
| 47 | + target_sizes=[(image_height, image_width)], |
| 48 | + ) |
| 49 | + |
| 50 | + predicted_depth = post_processed_output[0]["predicted_depth"] |
| 51 | + depth = (predicted_depth - predicted_depth.min()) / ( |
| 52 | + predicted_depth.max() - predicted_depth.min() |
| 53 | + ) |
| 54 | + depth = depth.detach().cpu().numpy() * 255 |
| 55 | + depth = Image.fromarray(depth.astype("uint8")) |
| 56 | + depth.save(output_image_path) |
| 57 | + |
| 58 | + |
| 59 | +def main(args): |
| 60 | + if args.compile_only and args.pre_gen_pte: |
| 61 | + raise RuntimeError("Cannot set both compile_only and pre_gen_pte as true") |
| 62 | + |
| 63 | + skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args) |
| 64 | + os.makedirs(args.artifact, exist_ok=True) |
| 65 | + |
| 66 | + model = AutoModelForDepthEstimation.from_pretrained( |
| 67 | + HUGGING_FACE_DEPTHANYTHING_V2 |
| 68 | + ).eval() |
| 69 | + |
| 70 | + data_num = 100 |
| 71 | + if args.ci: |
| 72 | + data_num = 1 |
| 73 | + inputs = [(torch.rand(1, 3, 256, 256),)] |
| 74 | + logging.warning( |
| 75 | + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." |
| 76 | + ) |
| 77 | + elif args.dump_example_output: |
| 78 | + url = "http://images.cocodataset.org/val2017/000000039769.jpg" |
| 79 | + image = Image.open(requests.get(url, stream=True).raw) |
| 80 | + image.save(os.path.join(args.artifact, "source.png")) |
| 81 | + image_processor = AutoImageProcessor.from_pretrained( |
| 82 | + HUGGING_FACE_DEPTHANYTHING_V2 |
| 83 | + ) |
| 84 | + |
| 85 | + pixel_values = image_processor(images=image, return_tensors="pt")[ |
| 86 | + "pixel_values" |
| 87 | + ] |
| 88 | + inputs = [(pixel_values,)] |
| 89 | + data_num = 1 |
| 90 | + else: |
| 91 | + inputs, _ = get_imagenet_dataset( |
| 92 | + dataset_path=f"{args.dataset}", |
| 93 | + data_size=data_num, |
| 94 | + image_shape=(256, 256), |
| 95 | + ) |
| 96 | + |
| 97 | + goldens = [] |
| 98 | + with torch.no_grad(): |
| 99 | + for per_input in inputs: |
| 100 | + predicted_depth = model(*per_input).predicted_depth |
| 101 | + goldens.append(predicted_depth.flatten()) |
| 102 | + |
| 103 | + pte_filename = "depthanything_v2_small_qnn" |
| 104 | + # Skip lowering/compilation if using pre-generated PTE |
| 105 | + if not args.pre_gen_pte: |
| 106 | + # Lower to QNN |
| 107 | + backend = get_backend_type(args.backend) |
| 108 | + quant_dtype = { |
| 109 | + QnnExecuTorchBackendType.kGpuBackend: None, |
| 110 | + QnnExecuTorchBackendType.kHtpBackend: QuantDtype.use_8a8w, |
| 111 | + }[backend] |
| 112 | + build_executorch_binary( |
| 113 | + model, |
| 114 | + inputs[0], |
| 115 | + args.model, |
| 116 | + os.path.join(args.artifact, pte_filename), |
| 117 | + inputs, |
| 118 | + skip_node_id_set=skip_node_id_set, |
| 119 | + skip_node_op_set=skip_node_op_set, |
| 120 | + quant_dtype=quant_dtype, |
| 121 | + backend=backend, |
| 122 | + shared_buffer=args.shared_buffer, |
| 123 | + online_prepare=args.online_prepare, |
| 124 | + ) |
| 125 | + |
| 126 | + if args.compile_only: |
| 127 | + return |
| 128 | + |
| 129 | + workspace = f"/data/local/tmp/{getpass.getuser()}/executorch/{pte_filename}" |
| 130 | + pte_path = ( |
| 131 | + f"{args.pre_gen_pte}/{pte_filename}.pte" |
| 132 | + if args.pre_gen_pte |
| 133 | + else f"{args.artifact}/{pte_filename}.pte" |
| 134 | + ) |
| 135 | + |
| 136 | + adb = SimpleADB( |
| 137 | + qnn_sdk=os.getenv("QNN_SDK_ROOT"), |
| 138 | + build_path=f"{args.build_folder}", |
| 139 | + pte_path=pte_path, |
| 140 | + workspace=workspace, |
| 141 | + device_id=args.device, |
| 142 | + host_id=args.host, |
| 143 | + soc_model=args.model, |
| 144 | + shared_buffer=args.shared_buffer, |
| 145 | + target=args.target, |
| 146 | + ) |
| 147 | + adb.push(inputs=inputs, backends={backend}) |
| 148 | + adb.execute() |
| 149 | + |
| 150 | + # collect output data |
| 151 | + output_data_folder = f"{args.artifact}/outputs" |
| 152 | + make_output_dir(output_data_folder) |
| 153 | + |
| 154 | + adb.pull(host_output_path=args.artifact) |
| 155 | + |
| 156 | + evaluations = { |
| 157 | + "sqnr": [], |
| 158 | + } |
| 159 | + for i in range(data_num): |
| 160 | + prediction = torch.from_numpy( |
| 161 | + np.fromfile( |
| 162 | + os.path.join(output_data_folder, f"output_{i}_0.raw"), dtype=np.float32 |
| 163 | + ) |
| 164 | + ) |
| 165 | + evaluations["sqnr"].append(compute_error(goldens[i], prediction)) |
| 166 | + |
| 167 | + if args.dump_example_output: |
| 168 | + example_input_shape = list(inputs[0][0].shape) |
| 169 | + image_height, image_width = example_input_shape[-2], example_input_shape[-1] |
| 170 | + |
| 171 | + # Post-process source model output and export the depth estimation image |
| 172 | + postprocess_output_and_save( |
| 173 | + goldens[0], |
| 174 | + image_height, |
| 175 | + image_width, |
| 176 | + os.path.join(args.artifact, "golden_depth.png"), |
| 177 | + ) |
| 178 | + prediction = np.fromfile( |
| 179 | + os.path.join(output_data_folder, "output_0_0.raw"), dtype=np.float32 |
| 180 | + ) |
| 181 | + # Post-process QNN output and export the depth estimation image |
| 182 | + postprocess_output_and_save( |
| 183 | + torch.from_numpy(prediction), |
| 184 | + image_height, |
| 185 | + image_width, |
| 186 | + os.path.join(args.artifact, "prediction_depth.png"), |
| 187 | + ) |
| 188 | + |
| 189 | + evaluations["sqnr"] = sum(evaluations["sqnr"]) / data_num |
| 190 | + if args.ip and args.port != -1: |
| 191 | + with Client((args.ip, args.port)) as conn: |
| 192 | + conn.send(json.dumps({"sqnr": evaluations["sqnr"]})) |
| 193 | + else: |
| 194 | + print("SQNR(dB)={sqnr}".format(**evaluations)) |
| 195 | + |
| 196 | + |
| 197 | +if __name__ == "__main__": |
| 198 | + parser = setup_common_args_and_variables() |
| 199 | + parser.add_argument( |
| 200 | + "-a", |
| 201 | + "--artifact", |
| 202 | + help="path for storing generated artifacts and output by this example. Default ./depthanything_v2_small", |
| 203 | + default="./depthanything_v2_small", |
| 204 | + type=str, |
| 205 | + ) |
| 206 | + parser.add_argument( |
| 207 | + "-d", |
| 208 | + "--dataset", |
| 209 | + help=( |
| 210 | + "path to the validation folder of ImageNet dataset. " |
| 211 | + "e.g. --dataset imagenet-mini/val " |
| 212 | + "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)" |
| 213 | + ), |
| 214 | + type=str, |
| 215 | + required=False, |
| 216 | + ) |
| 217 | + parser.add_argument( |
| 218 | + "--dump_example_output", |
| 219 | + help=( |
| 220 | + "If specified, export the example image and post-process both the source model output " |
| 221 | + "and the QNN output into depth-estimation images." |
| 222 | + ), |
| 223 | + action="store_true", |
| 224 | + default=False, |
| 225 | + ) |
| 226 | + |
| 227 | + args = parser.parse_args() |
| 228 | + args.validate(args) |
| 229 | + |
| 230 | + try: |
| 231 | + main(args) |
| 232 | + except Exception as e: |
| 233 | + if args.ip and args.port != -1: |
| 234 | + with Client((args.ip, args.port)) as conn: |
| 235 | + conn.send(json.dumps({"Error": str(e)})) |
| 236 | + else: |
| 237 | + raise Exception(e) |
0 commit comments