Skip to content

Commit 88954d7

Browse files
poganeshziyuanguo1998ZiyuanGuoxieofxie
authored
[AMD] Update VitisAI Recipes for Quark 0.11 and LLM Fusion Model Support (#265)
Co-authored-by: ziyuanguo1998 <Siryuanshao@gmail.com> Co-authored-by: ziyuanguo <ziyuanguo@microsoft.com> Co-authored-by: xieofxie <xieofxie@126.com>
1 parent 5e0de74 commit 88954d7

127 files changed

Lines changed: 3763 additions & 1377 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Qwen-Qwen1.5-7B-Chat/VitisAI/Qwen1.5-7B-Chat_quark_vitisai_llm.json

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,21 @@
33
"passes": {
44
"qq": {
55
"type": "QuarkQuantization",
6-
"quant_scheme": "w_uint4_per_group_asym",
6+
"quant_scheme": "uint4_wo_128",
77
"quant_algo": "awq",
88
"dataset": "pileval_for_awq_benchmark",
99
"data_type": "bfloat16",
1010
"num_calib_data": 128,
11-
"model_export": [ "hf_format" ],
12-
"exclude_layers": [ ],
13-
"quant_config": {
14-
"name": "awq",
15-
"scaling_layers": [
16-
{
17-
"prev_op": "input_layernorm",
18-
"layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ],
19-
"inp": "self_attn.q_proj",
20-
"module2inspect": "self_attn"
21-
},
22-
{ "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" },
23-
{
24-
"prev_op": "post_attention_layernorm",
25-
"layers": [ "mlp.gate_proj", "mlp.up_proj" ],
26-
"inp": "mlp.gate_proj",
27-
"module2inspect": "mlp"
28-
},
29-
{ "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" }
30-
],
31-
"model_decoder_layers": "model.layers"
32-
}
11+
"model_export": ["hf_format"],
12+
"exclude_layers": []
3313
},
34-
"mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false }
14+
"mg": {
15+
"type": "VitisGenerateModelLLM",
16+
"recipe": "full_fusion"
17+
}
3518
},
3619
"log_severity_level": 1,
3720
"output_dir": "models/Qwen1.5-7B-Chat-vai",
38-
"cache_dir": "cache",
21+
"cache_dir": "olive_cache",
3922
"no_artifacts": true
4023
}

Qwen-Qwen1.5-7B-Chat/VitisAI/README.md

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,31 @@ This folder contains sample Olive configuration to optimize Qwen models for AMD
1414
For LLMs - follow the below commands to generate the optimized model for VitisAI Execution Provider.
1515

1616
**Platform Support:**
17-
-**Linux with ROCm** - Supported
18-
-**Linux with CUDA** - Supported
17+
-**Linux with ROCm** - Supported on Ubuntu 24.04 and later. For the best experience, we recommend Ubuntu 24.04.
18+
-**Linux with CUDA** - Supported on Ubuntu 24.04 and later. For the best experience, we recommend Ubuntu 24.04.
1919
-**Windows with CUDA** - Supported
2020
-**Windows with CPU** - Supported (quantization will be slower)
2121
-**Windows with ROCm** - Planned for future release
2222

2323
For more details about quark, see the [Quark Documentation](https://quark.docs.amd.com/latest/)
2424

25-
#### **Create a Python 3.10 conda environment and run the below commands**
25+
#### **Create a Python 3.12 conda environment and run the below commands**
2626
```bash
27-
conda create -n olive python=3.10
27+
conda create -n olive python=3.12
2828
conda activate olive
2929
```
3030

31+
#### **Install Olive**
32+
33+
**Option 1: Install from PyPI**
3134
```bash
35+
pip install olive-ai[auto-opt]
36+
pip install transformers onnxruntime-genai
37+
```
38+
39+
**Option 2: Install from source**
40+
```bash
41+
git clone https://github.com/microsoft/Olive.git
3242
cd Olive
3343
pip install -e .
3444
pip install -r requirements.txt
@@ -41,7 +51,7 @@ cd olive-recipes/Qwen-Qwen1.5-7B-Chat/VitisAI
4151
pip install --force-reinstall -r requirements_vitisai_llm.txt
4252
```
4353

44-
**Note:** The requirements file automatically installs the correct `model-generate` version for your platform (1.5.0 for Linux, 1.5.1 for Windows).
54+
4555

4656
#### **Install PyTorch**
4757

@@ -60,6 +70,13 @@ pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https
6070

6171
python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
6272
```
73+
74+
**For CPU-only (Windows):**
75+
```bash
76+
pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
77+
python -c "import torch; print(torch.__version__)" # Should print 2.7.0+cpu
78+
```
79+
6380
#### **Generate optimized LLM model for VitisAI NPU**
6481
Follow the above setup instructions, then run the below command to generate the optimized LLM model for VitisAI EP
6582

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,27 @@
1+
--extra-index-url=https://pypi.amd.com/olive/1.7.1/simple
12
# AMD model generation
23
--extra-index-url=https://pypi.amd.com/simple
34
accelerate
45

56
# Quark
6-
amd-quark==0.9
7+
amd-quark==0.11
78
datasets
89
evaluate
910

10-
# Platform-specific model-generate versions:
11-
# Linux: use model-generate==1.5.0 (default)
12-
# Windows: MUST use model-generate==1.5.1
13-
model-generate==1.5.0; sys_platform != 'win32'
14-
model-generate==1.5.1; sys_platform == 'win32'
11+
model-generate
1512

1613
nltk
17-
numpy
14+
numpy==1.26.4
1815

1916
# Pin onnx version
2017
onnx==1.18.0
21-
onnxruntime==1.21.1
22-
onnxruntime-genai==0.7.1
18+
onnxruntime
19+
onnxruntime-genai
20+
onnxsim
2321
optimum
22+
23+
ryzenai-dynamic-dispatch
24+
ryzenai-onnx-utils
2425
sentencepiece
2526
tabulate
26-
transformers==4.50.0
27+
transformers==4.57.6

0 commit comments

Comments
 (0)