CodeLinaro
diff --git a/‎Qwen-Qwen1.5-7B-Chat/VitisAI/Qwen1.5-7B-Chat_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion b/‎Qwen-Qwen1.5-7B-Chat/VitisAI/Qwen1.5-7B-Chat_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Qwen-Qwen1.5-7B-Chat/VitisAI/README.md‎
Lines changed: 24 additions & 10 deletions b/‎Qwen-Qwen1.5-7B-Chat/VitisAI/README.md‎
Lines changed: 24 additions & 10 deletions
diff --git a/‎Qwen-Qwen1.5-7B-Chat/VitisAI/requirements_vitisai_llm.txt‎
Lines changed: 10 additions & 7 deletions b/‎Qwen-Qwen1.5-7B-Chat/VitisAI/requirements_vitisai_llm.txt‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎Qwen-Qwen2-7B-Instruct/VitisAI/Qwen2-7B-Instruct_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion b/‎Qwen-Qwen2-7B-Instruct/VitisAI/Qwen2-7B-Instruct_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Qwen-Qwen2-7B-Instruct/VitisAI/README.md‎
Lines changed: 24 additions & 10 deletions b/‎Qwen-Qwen2-7B-Instruct/VitisAI/README.md‎
Lines changed: 24 additions & 10 deletions
diff --git a/‎Qwen-Qwen2-7B-Instruct/VitisAI/requirements_vitisai_llm.txt‎
Lines changed: 10 additions & 7 deletions b/‎Qwen-Qwen2-7B-Instruct/VitisAI/requirements_vitisai_llm.txt‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎Qwen-Qwen2.5-0.5B-Instruct/VitisAI/Qwen2.5-0.5B-Instruct_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion b/‎Qwen-Qwen2.5-0.5B-Instruct/VitisAI/Qwen2.5-0.5B-Instruct_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Qwen-Qwen2.5-0.5B-Instruct/VitisAI/README.md‎
Lines changed: 24 additions & 10 deletions b/‎Qwen-Qwen2.5-0.5B-Instruct/VitisAI/README.md‎
Lines changed: 24 additions & 10 deletions
diff --git a/‎Qwen-Qwen2.5-0.5B-Instruct/VitisAI/requirements_vitisai_llm.txt‎
Lines changed: 10 additions & 7 deletions b/‎Qwen-Qwen2.5-0.5B-Instruct/VitisAI/requirements_vitisai_llm.txt‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎Qwen-Qwen2.5-1.5B-Instruct/VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion b/‎Qwen-Qwen2.5-1.5B-Instruct/VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json‎
Lines changed: 1 addition & 1 deletion
@@ -6,7 +6,7 @@
             "quant_scheme": "w_uint4_per_group_asym",
             "quant_algo": "awq",
             "dataset": "pileval_for_awq_benchmark",
-            "data_type": "float32",
+            "data_type": "bfloat16",
             "num_calib_data": 128,
             "model_export": [ "hf_format" ],
             "exclude_layers": [  ],
 
@@ -13,11 +13,16 @@ This folder contains sample Olive configuration to optimize Qwen models for AMD
 
 For LLMs - follow the below commands to generate the optimized model for VitisAI Execution Provider.
 
-**Note:** We’ve tested it on Linux with ROCm and on Linux with CUDA. It is also supported on Windows with CPU, though quantization may be slower. Support for Windows with CUDA/ROCm is planned for a future release.
+**Platform Support:**
+- ✅ **Linux with ROCm** - Supported
+- ✅ **Linux with CUDA** - Supported
+- ✅ **Windows with CUDA** - Supported
+- ✅ **Windows with CPU** - Supported (quantization will be slower)
+- ⏳ **Windows with ROCm** - Planned for future release
 
 For more details about quark, see the [Quark Documentation](https://quark.docs.amd.com/latest/)
 
-#### Create a Python 3.10 conda environment and run the below commands
+#### **Create a Python 3.10 conda environment and run the below commands**
 ```bash
 conda create -n olive python=3.10
 conda activate olive
@@ -29,24 +34,33 @@ pip install -e .
 pip install -r requirements.txt
 ```
 
-#### Install VitisAI LLM dependencies
+#### **Install VitisAI LLM dependencies**
 
 ```bash
-cd examples/qwen2_5/vitisai
+cd olive-recipes/Qwen-Qwen1.5-7B-Chat/VitisAI
 pip install --force-reinstall -r requirements_vitisai_llm.txt
-
-# Note: If you're running model generation on a Windows system, please uncomment the following line in requirements_vitisai_llm.txt:
-# --extra-index-url=https://pypi.amd.com/simple
-# model-generate==1.5.1
 ```
-Make sure to install the correct version of PyTorch before running quantization. If using AMD GPUs, update PyTorch to use ROCm-compatible PyTorch build. For example see the below commands
 
+**Note:** The requirements file automatically installs the correct `model-generate` version for your platform (1.5.0 for Linux, 1.5.1 for Windows).
+
+#### **Install PyTorch**
+
+Make sure to install the correct version of PyTorch before running quantization:
+
+**For AMD GPUs (ROCm):**
 ```bash
 pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1
 
 python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
 ```
-#### Generate optimized LLM model for VitisAI NPU
+
+**For NVIDIA GPUs (CUDA):**
+```bash
+pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu128
+
+python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
+```
+#### **Generate optimized LLM model for VitisAI NPU**
 Follow the above setup instructions, then run the below command to generate the optimized LLM model for VitisAI EP
 
 ```bash
 
@@ -1,23 +1,26 @@
-
 # AMD model generation
-# Model generation on a Linux system
 --extra-index-url=https://pypi.amd.com/simple
 accelerate
 
 # Quark
 amd-quark==0.9
 datasets
 evaluate
-model-generate==1.5.0
+
+# Platform-specific model-generate versions:
+# Linux: use model-generate==1.5.0 (default)
+# Windows: MUST use model-generate==1.5.1
+model-generate==1.5.0; sys_platform != 'win32'
+model-generate==1.5.1; sys_platform == 'win32'
+
 nltk
 numpy
+
+# Pin onnx version
+onnx==1.18.0
 onnxruntime==1.21.1
 onnxruntime-genai==0.7.1
 optimum
 sentencepiece
 tabulate
 transformers==4.50.0
-
-# Uncomment the below line if running model generation on a Windows system
-# --extra-index-url=https://pypi.amd.com/simple
-# model-generate==1.5.1
 
@@ -6,7 +6,7 @@
             "quant_scheme": "w_uint4_per_group_asym",
             "quant_algo": "awq",
             "dataset": "pileval_for_awq_benchmark",
-            "data_type": "float32",
+            "data_type": "bfloat16",
             "num_calib_data": 128,
             "model_export": [ "hf_format" ],
             "exclude_layers": [  ],
 
@@ -13,11 +13,16 @@ This folder contains sample Olive configuration to optimize Qwen models for AMD
 
 For LLMs - follow the below commands to generate the optimized model for VitisAI Execution Provider.
 
-**Note:** We’ve tested it on Linux with ROCm and on Linux with CUDA. It is also supported on Windows with CPU, though quantization may be slower. Support for Windows with CUDA/ROCm is planned for a future release.
+**Platform Support:**
+- ✅ **Linux with ROCm** - Supported
+- ✅ **Linux with CUDA** - Supported
+- ✅ **Windows with CUDA** - Supported
+- ✅ **Windows with CPU** - Supported (quantization will be slower)
+- ⏳ **Windows with ROCm** - Planned for future release
 
 For more details about quark, see the [Quark Documentation](https://quark.docs.amd.com/latest/)
 
-#### Create a Python 3.10 conda environment and run the below commands
+#### **Create a Python 3.10 conda environment and run the below commands**
 ```bash
 conda create -n olive python=3.10
 conda activate olive
@@ -29,24 +34,33 @@ pip install -e .
 pip install -r requirements.txt
 ```
 
-#### Install VitisAI LLM dependencies
+#### **Install VitisAI LLM dependencies**
 
 ```bash
-cd examples/qwen2_5/vitisai
+cd olive-recipes/Qwen-Qwen2-7B-Instruct/VitisAI
 pip install --force-reinstall -r requirements_vitisai_llm.txt
-
-# Note: If you're running model generation on a Windows system, please uncomment the following line in requirements_vitisai_llm.txt:
-# --extra-index-url=https://pypi.amd.com/simple
-# model-generate==1.5.1
 ```
-Make sure to install the correct version of PyTorch before running quantization. If using AMD GPUs, update PyTorch to use ROCm-compatible PyTorch build. For example see the below commands
 
+**Note:** The requirements file automatically installs the correct `model-generate` version for your platform (1.5.0 for Linux, 1.5.1 for Windows).
+
+#### **Install PyTorch**
+
+Make sure to install the correct version of PyTorch before running quantization:
+
+**For AMD GPUs (ROCm):**
 ```bash
 pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1
 
 python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
 ```
-#### Generate optimized LLM model for VitisAI NPU
+
+**For NVIDIA GPUs (CUDA):**
+```bash
+pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu128
+
+python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
+```
+#### **Generate optimized LLM model for VitisAI NPU**
 Follow the above setup instructions, then run the below command to generate the optimized LLM model for VitisAI EP
 
 ```bash
 
@@ -1,23 +1,26 @@
-
 # AMD model generation
-# Model generation on a Linux system
 --extra-index-url=https://pypi.amd.com/simple
 accelerate
 
 # Quark
 amd-quark==0.9
 datasets
 evaluate
-model-generate==1.5.0
+
+# Platform-specific model-generate versions:
+# Linux: use model-generate==1.5.0 (default)
+# Windows: MUST use model-generate==1.5.1
+model-generate==1.5.0; sys_platform != 'win32'
+model-generate==1.5.1; sys_platform == 'win32'
+
 nltk
 numpy
+
+# Pin onnx version
+onnx==1.18.0
 onnxruntime==1.21.1
 onnxruntime-genai==0.7.1
 optimum
 sentencepiece
 tabulate
 transformers==4.50.0
-
-# Uncomment the below line if running model generation on a Windows system
-# --extra-index-url=https://pypi.amd.com/simple
-# model-generate==1.5.1
 
@@ -6,7 +6,7 @@
             "quant_scheme": "w_uint4_per_group_asym",
             "quant_algo": "awq",
             "dataset": "pileval_for_awq_benchmark",
-            "data_type": "float32",
+            "data_type": "bfloat16",
             "num_calib_data": 128,
             "model_export": [ "hf_format" ],
             "exclude_layers": [  ],
 
@@ -13,11 +13,16 @@ This folder contains sample Olive configuration to optimize Qwen models for AMD
 
 For LLMs - follow the below commands to generate the optimized model for VitisAI Execution Provider.
 
-**Note:** We’ve tested it on Linux with ROCm and on Linux with CUDA. It is also supported on Windows with CPU, though quantization may be slower. Support for Windows with CUDA/ROCm is planned for a future release.
+**Platform Support:**
+- ✅ **Linux with ROCm** - Supported
+- ✅ **Linux with CUDA** - Supported
+- ✅ **Windows with CUDA** - Supported
+- ✅ **Windows with CPU** - Supported (quantization will be slower)
+- ⏳ **Windows with ROCm** - Planned for future release
 
 For more details about quark, see the [Quark Documentation](https://quark.docs.amd.com/latest/)
 
-#### Create a Python 3.10 conda environment and run the below commands
+#### **Create a Python 3.10 conda environment and run the below commands**
 ```bash
 conda create -n olive python=3.10
 conda activate olive
@@ -29,24 +34,33 @@ pip install -e .
 pip install -r requirements.txt
 ```
 
-#### Install VitisAI LLM dependencies
+#### **Install VitisAI LLM dependencies**
 
 ```bash
-cd examples/qwen2_5/vitisai
+cd olive-recipes/Qwen-Qwen2.5-0.5B-Instruct/VitisAI
 pip install --force-reinstall -r requirements_vitisai_llm.txt
-
-# Note: If you're running model generation on a Windows system, please uncomment the following line in requirements_vitisai_llm.txt:
-# --extra-index-url=https://pypi.amd.com/simple
-# model-generate==1.5.1
 ```
-Make sure to install the correct version of PyTorch before running quantization. If using AMD GPUs, update PyTorch to use ROCm-compatible PyTorch build. For example see the below commands
 
+**Note:** The requirements file automatically installs the correct `model-generate` version for your platform (1.5.0 for Linux, 1.5.1 for Windows).
+
+#### **Install PyTorch**
+
+Make sure to install the correct version of PyTorch before running quantization:
+
+**For AMD GPUs (ROCm):**
 ```bash
 pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1
 
 python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
 ```
-#### Generate optimized LLM model for VitisAI NPU
+
+**For NVIDIA GPUs (CUDA):**
+```bash
+pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu128
+
+python -c "import torch; print(torch.cuda.is_available())" # Must return `True`
+```
+#### **Generate optimized LLM model for VitisAI NPU**
 Follow the above setup instructions, then run the below command to generate the optimized LLM model for VitisAI EP
 
 ```bash
 
@@ -1,23 +1,26 @@
-
 # AMD model generation
-# Model generation on a Linux system
 --extra-index-url=https://pypi.amd.com/simple
 accelerate
 
 # Quark
 amd-quark==0.9
 datasets
 evaluate
-model-generate==1.5.0
+
+# Platform-specific model-generate versions:
+# Linux: use model-generate==1.5.0 (default)
+# Windows: MUST use model-generate==1.5.1
+model-generate==1.5.0; sys_platform != 'win32'
+model-generate==1.5.1; sys_platform == 'win32'
+
 nltk
 numpy
+
+# Pin onnx version
+onnx==1.18.0
 onnxruntime==1.21.1
 onnxruntime-genai==0.7.1
 optimum
 sentencepiece
 tabulate
 transformers==4.50.0
-
-# Uncomment the below line if running model generation on a Windows system
-# --extra-index-url=https://pypi.amd.com/simple
-# model-generate==1.5.1
 
@@ -6,7 +6,7 @@
             "quant_scheme": "w_uint4_per_group_asym",
             "quant_algo": "awq",
             "dataset": "pileval_for_awq_benchmark",
-            "data_type": "float32",
+            "data_type": "bfloat16",
             "num_calib_data": 128,
             "model_export": [ "hf_format" ],
             "exclude_layers": [  ],