Skip to content

Commit f4ab841

Browse files
Shiva ChilukamariHCKTest
authored andcommitted
Added Mixed Precision Quantization for decoder
1 parent 68ebcec commit f4ab841

6 files changed

Lines changed: 231 additions & 24 deletions

File tree

sam2.1-hiera-small/QNN/README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Model compilation using QNN Execution Provider requires a Python environment wit
1919
pip install olive-ai onnxruntime-qnn torch torchvision transformers
2020
```
2121

22-
Replace `/path/to/qnn/env/bin` in [sam21_vision_encoder_qnn_ctx.json](sam21_vision_encoder_qnn_ctx.json) and [sam21_mask_decoder_qnn_ctx.json](sam21_mask_decoder_qnn_ctx.json) with the path to the directory containing your QNN environment's Python executable. This path can be found by running the following command in the environment:
22+
Replace `/path/to/qnn/env/bin` in [sam21_vision_encoder_qnn_ctx.json](sam21_vision_encoder_qnn_ctx.json), [sam21_mask_decoder_qnn_fp16_ctx.json](sam21_mask_decoder_qnn_fp16_ctx.json) and [sam21_mask_decoder_qnn_mp_ctx.json](sam21_mask_decoder_qnn_mp_ctx.json) with the path to the directory containing your QNN environment's Python executable. This path can be found by running the following command in the environment:
2323

2424
```bash
2525
# Linux
@@ -45,9 +45,14 @@ For Encoder Model:
4545
olive run --config sam21_vision_encoder_qnn_ctx.json
4646
```
4747

48-
For Decoder Model:
48+
For Decoder Model with FP16 quantization:
4949
```bash
50-
olive run --config sam21_mask_decoder_qnn_ctx.json
50+
olive run --config sam21_mask_decoder_qnn_fp16_ctx.json
51+
```
52+
53+
For Decoder Model with Mixed Precision quantization:
54+
```bash
55+
olive run --config sam21_mask_decoder_qnn_mp_ctx.json
5156
```
5257

5358
> ⚠️ If optimization fails during context binary generation, rerun the command. The process will resume from the last completed step.

sam2.1-hiera-small/QNN/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ class ModelConfig:
1010
ve_input_name = "pixel_values"
1111
ve_channel_size = 3
1212
ve_sample_size = 1024
13-
md_input_names = ("image_embeddings", "high_res_features1", "high_res_features2", "coords.1", "labels")
14-
ms_input_shapes = ((1, 256, 64, 64), (1, 32, 256, 256), (1, 64, 128, 128), (1, 5, 2), (1, 5))
13+
md_input_names = ("image_embeddings", "high_res_features1", "high_res_features2", "point_coords", "point_labels", "mask_input", "has_mask_input")
14+
ms_input_shapes = ((1, 256, 64, 64), (1, 32, 256, 256), (1, 64, 128, 128), (1, 5, 2), (1, 5), (1, 1, 256, 256), [1])

sam2.1-hiera-small/QNN/info.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@ keywords:
33
qnn
44
arch: hiera
55
recipes:
6-
- file: "sam21_mask_decoder_qnn_ctx.json"
6+
- file: "sam21_mask_decoder_qnn_fp16_ctx.json"
7+
device:
8+
- npu
9+
ep: QNNExecutionProvider
10+
11+
- file: "sam21_mask_decoder_qnn_mp_ctx.json"
712
device:
813
- npu
914
ep: QNNExecutionProvider

sam2.1-hiera-small/QNN/sam21_mask_decoder_qnn_ctx.json renamed to sam2.1-hiera-small/QNN/sam21_mask_decoder_qnn_fp16_ctx.json

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,14 @@
2121
},
2222
"data_configs": [
2323
{
24-
"name": "dummy_data_config",
25-
"type": "DummyDataContainer",
24+
"name": "latency_data_config",
25+
"user_script": "user_script.py",
2626
"load_dataset_config": {
27-
"input_shapes": [
28-
[
29-
1,
30-
3,
31-
1024,
32-
1024
33-
]
34-
],
35-
"input_names": [
36-
"input.1"
37-
],
38-
"input_types": [
39-
"float32"
40-
]
27+
"type": "local_dataset"
28+
},
29+
"dataloader_config": {
30+
"type": "md_data_loader",
31+
"batch_size": 1
4132
}
4233
}
4334
],
@@ -62,7 +53,7 @@
6253
"log_severity_level": 0,
6354
"ort_log_severity_level": 1,
6455
"ort_py_log_severity_level": 1,
65-
"cache_dir": "cache_decoder",
66-
"output_dir": "model/decoder",
56+
"cache_dir": "cache_decoder_fp16",
57+
"output_dir": "model/decoder_fp16",
6758
"no_artifacts": true
6859
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
{
2+
"input_model": {
3+
"type": "ONNXModel",
4+
"model_path": "sam21_mask_decoder.onnx"
5+
},
6+
"systems": {
7+
"local_system": {
8+
"type": "LocalSystem",
9+
"accelerators": [
10+
{
11+
"device": "gpu",
12+
"execution_providers": [
13+
"CUDAExecutionProvider"
14+
]
15+
}
16+
]
17+
},
18+
"target_system": {
19+
"type": "PythonEnvironment",
20+
"python_environment_path": "/path/to/qnn/env/bin",
21+
"accelerators": [
22+
{
23+
"execution_providers": [
24+
"QNNExecutionProvider"
25+
]
26+
}
27+
]
28+
}
29+
},
30+
"data_configs": [
31+
{
32+
"name": "latency_data_config",
33+
"user_script": "user_script.py",
34+
"load_dataset_config": {
35+
"type": "local_dataset"
36+
},
37+
"dataloader_config": {
38+
"type": "md_data_loader",
39+
"batch_size": 1
40+
}
41+
},
42+
{
43+
"name": "quantize_data_config",
44+
"user_script": "user_script.py",
45+
"load_dataset_config": {
46+
"type": "local_dataset"
47+
},
48+
"dataloader_config": {
49+
"type": "md_quantize_data_loader",
50+
"data_num": 200,
51+
"point_p": 0.3,
52+
"mask_p": 0.2
53+
}
54+
}
55+
],
56+
"passes": {
57+
"f16": {
58+
"type": "OnnxFloatToFloat16",
59+
"keep_io_types": true,
60+
"save_as_external_data": true,
61+
"node_include_list": ["/Add", "/Concat", "/Concat_1", "/Gather",
62+
"/Div", "/Reshape", "/ScatterND", "/Gather_2",
63+
"/Div_1", "/Reshape_1", "/ScatterND_1", "/Mul_6",
64+
"/Sub", "/MatMul", "/Mul_7", "/Sin", "/Cos",
65+
"/Concat_6", "/Equal_6", "/Unsqueeze_6", "/Where_6",
66+
"/Equal_7", "/Unsqueeze_7", "/Add_1", "/Where_7",
67+
"/Equal_8", "/Unsqueeze_8", "/Add_2", "/Where_8",
68+
"/Equal_9", "/Unsqueeze_9", "/Add_3", "/Where_9",
69+
"/Equal_10", "/Unsqueeze_10", "/Add_4", "/Where_10"]
70+
},
71+
"sq": {
72+
"type": "OnnxStaticQuantization",
73+
"data_config": "quantize_data_config",
74+
"activation_type": "uint8",
75+
"precision": "uint8",
76+
"calibration_providers": [ "CUDAExecutionProvider" ],
77+
"calibrate_method": "MinMax",
78+
"quant_preprocess": true,
79+
"op_types_to_exclude": ["Cast"],
80+
"save_as_external_data": true,
81+
"nodes_to_exclude":["/Add", "/Concat", "/Concat_1", "/Gather",
82+
"/Div", "/Reshape", "/ScatterND", "/Gather_2",
83+
"/Div_1", "/Reshape_1", "/ScatterND_1", "/Mul_6",
84+
"/Sub", "/MatMul", "/Mul_7", "/Sin", "/Cos",
85+
"/Concat_6", "/Equal_6", "/Unsqueeze_6", "/Where_6",
86+
"/Equal_7", "/Unsqueeze_7", "/Add_1", "/Where_7",
87+
"/Equal_8", "/Unsqueeze_8", "/Add_2", "/Where_8",
88+
"/Equal_9", "/Unsqueeze_9", "/Add_3", "/Where_9",
89+
"/Equal_10", "/Unsqueeze_10", "/Add_4", "/Where_10"]
90+
},
91+
"cb": {
92+
"type": "EPContextBinaryGenerator",
93+
"provider_options": {
94+
"htp_performance_mode": "burst",
95+
"htp_graph_finalization_optimization_mode": "3",
96+
"offload_graph_io_quantization": "0",
97+
"soc_model": "60"
98+
},
99+
"weight_sharing": false
100+
}
101+
},
102+
"host": "local_system",
103+
"target": "target_system",
104+
"log_severity_level": 0,
105+
"ort_log_severity_level": 1,
106+
"ort_py_log_severity_level": 1,
107+
"cache_dir": "cache_decoder_mp",
108+
"output_dir": "model/decoder_mp",
109+
"no_artifacts": true
110+
}

sam2.1-hiera-small/QNN/user_script.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ def __init__(self, total):
5252
self.finish_load()
5353

5454

55+
class MdDecoderGeneratedDataLoader(BaseDataLoader):
56+
def __init__(self, total, point_p, mask_p):
57+
super().__init__(total)
58+
md_generate_quant_data(total, point_p, mask_p)
59+
self.data_files = [
60+
os.path.join(ModelConfig.data_dir, f.name)
61+
for f in os.scandir(ModelConfig.data_dir)
62+
if "points.npz" in f.name
63+
]
64+
self.data_files.sort()
65+
for f in self.data_files:
66+
self.load(f)
67+
self.finish_load()
68+
69+
5570
class RandomDataLoader:
5671
def __init__(self, create_inputs_func, batch_size, torch_dtype):
5772
self.create_input_func = create_inputs_func
@@ -102,6 +117,11 @@ def md_data_loader(dataset, batch_size, *args, **kwargs):
102117
return RandomDataLoader(md_inputs, batch_size, torch.float32)
103118

104119

120+
@Registry.register_dataloader()
121+
def md_quantize_data_loader(dataset, data_num, point_p, mask_p, *args, **kwargs):
122+
return MdDecoderGeneratedDataLoader(data_num, point_p, mask_p)
123+
124+
105125
def ve_generate_quant_data(num_samples):
106126
p = Path(ModelConfig.data_dir)
107127
if p.is_dir() and (len([f for f in p.glob("*images.npz")]) >= num_samples):
@@ -118,3 +138,79 @@ def ve_generate_quant_data(num_samples):
118138
inputs = processor(image, return_tensors="pt")
119139
pixel_values = inputs["pixel_values"].detach().cpu().numpy()
120140
np.savez(f"{ModelConfig.data_dir}/input_{i}_images.npz", input=pixel_values)
141+
142+
143+
def get_inputs(sample, point_p = 0.0, mask_p = 0.0):
144+
inputs = {}
145+
segments_info = sample['segments_info']
146+
segment_info = np.random.choice(segments_info)
147+
box = segment_info['bbox']
148+
p1 = [box[0], box[1]]
149+
p2 = [p1[0] + box[2], p1[1] + box[3]]
150+
p = np.mean([p1, p2], axis = 0)
151+
152+
if np.random.random() < point_p:
153+
inputs['point_coords'] = np.concatenate([[p], np.zeros((4, 2))])[None, :]
154+
inputs['point_labels'] = np.concatenate([[1], -np.ones(4)])[None, :]
155+
else:
156+
inputs['point_coords'] = np.concatenate([[p1, p2], np.zeros((3, 2))])[None, :]
157+
inputs['point_labels'] = np.concatenate([[2, 3], -np.ones(3)])[None, :]
158+
159+
if np.random.random() < mask_p:
160+
pil_mask = sample['label']
161+
w, h = pil_mask.size
162+
masks = np.array(pil_mask.resize((256, 256)))
163+
mask_point = masks[int(p[1]*256/h), int(p[0]*256/w)]
164+
mask = (masks == mask_point).all(axis = -1)
165+
inputs['mask_input'] = mask[None, None, :]
166+
inputs['has_mask_input'] = np.array([1])
167+
else:
168+
inputs['mask_input'] = np.zeros((1, 1, 256, 256))
169+
inputs['has_mask_input'] = np.array([0])
170+
return inputs
171+
172+
173+
def md_generate_quant_data(num_samples, point_p, mask_p):
174+
p = Path(ModelConfig.data_dir)
175+
if p.is_dir() and (len([f for f in p.glob("*points.npz")]) >= num_samples):
176+
return
177+
from hydra import initialize
178+
from hydra.core.global_hydra import GlobalHydra
179+
from sam2.build_sam import build_sam2
180+
from generate_model import model_weights_url, checkpoint, model_config_url, model_cfg
181+
from generate_model import download_file, SAM2Encoder
182+
183+
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
184+
185+
186+
processor = Sam2Processor.from_pretrained(ModelConfig.model_name)
187+
dataset = load_dataset("nielsr/coco-panoptic-val2017")
188+
dataset = dataset["train"]
189+
os.makedirs(ModelConfig.data_dir, exist_ok=True)
190+
191+
download_file(model_weights_url, checkpoint)
192+
download_file(model_config_url, model_cfg)
193+
194+
GlobalHydra.instance().clear()
195+
initialize(config_path="./", job_name="sam2_inference", version_base=None)
196+
sam2_model = build_sam2(model_cfg, checkpoint, device="cpu")
197+
198+
encoder = SAM2Encoder(sam2_model).to(device)
199+
200+
for i, sample in enumerate(dataset):
201+
if i >= num_samples:
202+
break
203+
image = sample['image']
204+
inputs = get_inputs(sample, point_p, mask_p)
205+
process_inputs = processor(image, input_points = [inputs['point_coords']], input_labels = [inputs['point_labels']], return_tensors="pt")
206+
image_embed, high_feat_1, high_feat2 = encoder(input = process_inputs['pixel_values'].to(device))
207+
quant_inputs = {}
208+
quant_inputs['image_embeddings'] = image_embed.detach().cpu().numpy().astype(np.float32)
209+
quant_inputs['high_res_features1'] = high_feat_1.detach().cpu().numpy().astype(np.float32)
210+
quant_inputs['high_res_features2'] = high_feat2.detach().cpu().numpy().astype(np.float32)
211+
quant_inputs['point_coords'] = process_inputs['input_points'].detach().cpu().numpy()[0].astype(np.float32)
212+
quant_inputs['point_labels'] = process_inputs['input_labels'].detach().cpu().numpy()[0].astype(np.float32)
213+
quant_inputs['mask_input'] = inputs['mask_input'].astype(np.float32)
214+
quant_inputs['has_mask_input'] = inputs['has_mask_input'].astype(np.float32)
215+
216+
np.savez(f"{ModelConfig.data_dir}/input_{i}_points.npz", **quant_inputs)

0 commit comments

Comments
 (0)