Skip to content

Commit e6bd75d

Browse files
committed
Olive recipes for non-LLMs using QNN-GPU through QNN-EP
- Recipes added for: - OpenClip: laion/CLIP-ViT-B-32-laion2B-s34B-b79K - OpenAI CLIP 16x16 - OpenAI CLIP 32x32 - ViT: google/vit-base-patch16-224 - ResNet-50 - BERT: google-bert/bert-base-multilingual-cased - Intel: bert-base-uncased-mrpc
1 parent 7a6f881 commit e6bd75d

20 files changed

Lines changed: 981 additions & 0 deletions

File tree

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{
2+
"input_model": {
3+
"type": "HfModel",
4+
"model_path": "google-bert/bert-base-multilingual-cased",
5+
"task": "feature-extraction",
6+
"io_config": {
7+
"input_names": [
8+
"input_ids",
9+
"attention_mask",
10+
"token_type_ids"
11+
],
12+
"input_shapes": [
13+
[
14+
1,
15+
128
16+
],
17+
[
18+
1,
19+
128
20+
],
21+
[
22+
1,
23+
128
24+
]
25+
],
26+
"input_types": [
27+
"int64",
28+
"int64",
29+
"int64"
30+
],
31+
"output_names": [
32+
"last_hidden_state"
33+
]
34+
}
35+
},
36+
"systems": {
37+
"target_system": {
38+
"type": "LocalSystem",
39+
"accelerators": [
40+
{
41+
"device": "gpu",
42+
"execution_providers": [
43+
"QNNExecutionProvider"
44+
]
45+
}
46+
]
47+
}
48+
},
49+
"data_configs": [
50+
{
51+
"name": "quantization_data_config",
52+
"type": "HuggingfaceContainer",
53+
"load_dataset_config": {
54+
"data_name": "facebook/xnli",
55+
"subset": "en",
56+
"split": "validation"
57+
},
58+
"pre_process_data_config": {
59+
"input_cols": [
60+
"premise"
61+
],
62+
"padding": "max_length",
63+
"max_length": 128,
64+
"max_samples": 10
65+
},
66+
"dataloader_config": {
67+
"batch_size": 1
68+
}
69+
},
70+
{
71+
"name": "evaluation_data_config",
72+
"type": "HuggingfaceContainer",
73+
"load_dataset_config": {
74+
"data_name": "facebook/xnli",
75+
"subset": "en",
76+
"split": "validation"
77+
},
78+
"pre_process_data_config": {
79+
"input_cols": [
80+
"premise"
81+
],
82+
"padding": "max_length",
83+
"max_length": 128,
84+
"max_samples": 10
85+
},
86+
"dataloader_config": {
87+
"batch_size": 1
88+
}
89+
}
90+
],
91+
"target": "target_system",
92+
"cache_dir": "cache",
93+
"output_dir": "model/google_bert",
94+
"evaluate_input_model": false
95+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "Convert to Qualcomm GPU",
3+
"oliveFile": "bert/qnn/google_bert_qnn_gpu.json",
4+
"runtime": {
5+
"autoGenerated": true,
6+
"name": "Evaluate on",
7+
"type": "enum",
8+
"displayNames": [
9+
"Qualcomm GPU",
10+
"CPU"
11+
],
12+
"path": "systems.target_system.accelerators.0.execution_providers.0",
13+
"values": [
14+
"QNNExecutionProvider",
15+
"CPUExecutionProvider"
16+
],
17+
"readOnly": false
18+
},
19+
"sections": [
20+
{
21+
"autoGenerated": true,
22+
"name": "Convert",
23+
"phase": "Conversion",
24+
"parameters": [],
25+
"toggle": {
26+
"autoGenerated": true,
27+
"name": "Convert to ONNX format",
28+
"type": "bool",
29+
"path": "passes.conversion",
30+
"actions": [
31+
[],
32+
[]
33+
],
34+
"readOnly": true
35+
}
36+
}
37+
]
38+
}

google-bert-bert-base-multilingual-cased/aitk/info.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ recipes:
2323
- file: "bert-base-multilingual-cased_migraphx.json"
2424
device: gpu
2525
ep: MIGraphXExecutionProvider
26+
- file: "bert-base-multilingual-cased_qnn_gpu.json"
27+
device: gpu
28+
ep: QNNExecutionProvider
2629
aitk:
2730
modelInfo:
2831
id: "huggingface/google-bert/bert-base-multilingual-cased"

google-vit-base-patch16-224/aitk/info.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ recipes:
2323
- file: "vit-base-patch16-224_migraphx.json"
2424
device: gpu
2525
ep: MIGraphXExecutionProvider
26+
- file: "vit-base-patch16-224_qnn_gpu.json"
27+
device: gpu
28+
ep: QNNExecutionProvider
2629
aitk:
2730
modelInfo:
2831
id: "huggingface/google/vit-base-patch16-224"
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{
2+
"input_model": {
3+
"type": "HfModel",
4+
"model_path": "google/vit-base-patch16-224",
5+
"task": "image-classification",
6+
"io_config": {
7+
"input_names": [
8+
"pixel_values"
9+
],
10+
"input_shapes": [
11+
[
12+
1,
13+
3,
14+
224,
15+
224
16+
]
17+
],
18+
"output_names": [
19+
"output"
20+
]
21+
}
22+
},
23+
"systems": {
24+
"target_system": {
25+
"type": "LocalSystem",
26+
"accelerators": [
27+
{
28+
"device": "gpu",
29+
"execution_providers": [
30+
"QNNExecutionProvider"
31+
]
32+
}
33+
]
34+
}
35+
},
36+
"data_configs": [
37+
{
38+
"name": "quantize_data_config",
39+
"type": "HuggingfaceContainer",
40+
"user_script": "vit-base-patch16-224.py",
41+
"load_dataset_config": {
42+
"data_name": "timm/mini-imagenet",
43+
"split": "train",
44+
"streaming": true,
45+
"trust_remote_code": true
46+
},
47+
"pre_process_data_config": {
48+
"type": "dataset_pre_process",
49+
"size": 256,
50+
"cache_key": "imagedata_quantization"
51+
},
52+
"post_process_data_config": {
53+
"type": "dataset_post_process"
54+
}
55+
},
56+
{
57+
"name": "evaluation_data_config",
58+
"type": "HuggingfaceContainer",
59+
"user_script": "vit-base-patch16-224.py",
60+
"load_dataset_config": {
61+
"data_name": "timm/mini-imagenet",
62+
"split": "validation",
63+
"streaming": true,
64+
"trust_remote_code": true
65+
},
66+
"pre_process_data_config": {
67+
"type": "dataset_pre_process",
68+
"size": 1000,
69+
"cache_key": "imagedata_evaluation"
70+
},
71+
"post_process_data_config": {
72+
"type": "dataset_post_process"
73+
}
74+
}
75+
],
76+
"passes": {
77+
"conversion": {
78+
"type": "OnnxConversion",
79+
"target_opset": 20,
80+
"save_as_external_data": true
81+
},
82+
"surgery": {
83+
"type": "GraphSurgeries",
84+
"surgeries": [
85+
{
86+
"surgeon": "MatMulAddToGemm"
87+
}
88+
]
89+
}
90+
},
91+
"target": "target_system",
92+
"output_dir": "model/vit",
93+
"evaluate_input_model": false,
94+
"cache_dir": "cache"
95+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "Convert to Qualcomm GPU",
3+
"oliveFile": "vit/qnn/vit_qnn_gpu.json",
4+
"runtime": {
5+
"autoGenerated": true,
6+
"name": "Evaluate on",
7+
"type": "enum",
8+
"displayNames": [
9+
"Qualcomm GPU",
10+
"CPU"
11+
],
12+
"path": "systems.target_system.accelerators.0.execution_providers.0",
13+
"values": [
14+
"QNNExecutionProvider",
15+
"CPUExecutionProvider"
16+
],
17+
"readOnly": false
18+
},
19+
"sections": [
20+
{
21+
"autoGenerated": true,
22+
"name": "Convert",
23+
"phase": "Conversion",
24+
"parameters": [],
25+
"toggle": {
26+
"autoGenerated": true,
27+
"name": "Convert to ONNX format",
28+
"type": "bool",
29+
"path": "passes.conversion",
30+
"actions": [
31+
[],
32+
[]
33+
],
34+
"readOnly": true
35+
}
36+
}
37+
]
38+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
{
2+
"input_model": {
3+
"type": "HfModel",
4+
"model_path": "Intel/bert-base-uncased-mrpc",
5+
"task": "text-classification",
6+
"load_kwargs": {
7+
"attn_implementation": "eager"
8+
}
9+
},
10+
"systems": {
11+
"local_system": {
12+
"type": "LocalSystem",
13+
"accelerators": [
14+
{
15+
"device": "gpu",
16+
"execution_providers": [
17+
"QNNExecutionProvider"
18+
]
19+
}
20+
]
21+
}
22+
},
23+
"data_configs": [
24+
{
25+
"name": "glue_mrpc",
26+
"type": "HuggingfaceContainer",
27+
"load_dataset_config": {
28+
"data_name": "glue",
29+
"subset": "mrpc",
30+
"split": "validation"
31+
},
32+
"pre_process_data_config": {
33+
"max_length": 128,
34+
"padding": "max_length",
35+
"input_cols": [
36+
"sentence1",
37+
"sentence2"
38+
],
39+
"max_samples": 100
40+
},
41+
"dataloader_config": {
42+
"batch_size": 1
43+
}
44+
},
45+
{
46+
"name": "glue_mrpc_eval",
47+
"type": "HuggingfaceContainer",
48+
"load_dataset_config": {
49+
"data_name": "glue",
50+
"subset": "mrpc",
51+
"split": "validation"
52+
},
53+
"pre_process_data_config": {
54+
"max_length": 128,
55+
"padding": "max_length",
56+
"input_cols": [
57+
"sentence1",
58+
"sentence2"
59+
],
60+
"max_samples": 100
61+
},
62+
"dataloader_config": {
63+
"batch_size": 1
64+
}
65+
}
66+
],
67+
"passes": {
68+
"conversion": {
69+
"type": "OnnxConversion",
70+
"target_opset": 20,
71+
"save_as_external_data": true
72+
},
73+
"onnx_simplify": {
74+
"type": "OnnxPeepholeOptimizer"
75+
},
76+
"dynamic_shape_to_fixed": {
77+
"type": "DynamicToFixedShape",
78+
"dim_param": [
79+
"batch_size",
80+
"sequence_length"
81+
],
82+
"dim_value": [
83+
1,
84+
128
85+
]
86+
},
87+
"surgery": {
88+
"type": "GraphSurgeries",
89+
"surgeries": [
90+
{
91+
"surgeon": "ReplaceAttentionMaskValue"
92+
},
93+
{
94+
"surgeon": "MatMulAddToGemm"
95+
}
96+
]
97+
}
98+
},
99+
"evaluate_input_model": false,
100+
"target": "local_system",
101+
"cache_dir": "cache",
102+
"output_dir": "model/bert_ptq_qnn"
103+
}

0 commit comments

Comments
 (0)