Skip to content

Commit c4a7d92

Browse files
Intel® OpenVINO add gemma-3-1b-it recipes (microsoft#365)
Co-authored-by: xieofxie <xieofxie@126.com>
1 parent 2bdec25 commit c4a7d92

6 files changed

Lines changed: 325 additions & 99 deletions

File tree

README.md

Lines changed: 104 additions & 99 deletions
Large diffs are not rendered by default.
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Gemma-3-1b-it Compression
2+
3+
This folder contains a sample use case of Olive to optimize the [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) model using Intel® OpenVINO tools.
4+
5+
- Intel® NPU: [Gemma 3 1b it Dynamic Shape model optimized for NPU](#gemma-3-1b-it-npu)
6+
- Intel® GPU: [Gemma 3 1b it Dynamic Shape model optimized for GPU](#gemma-3-1b-it-gpu)
7+
8+
## Quantization Workflows
9+
10+
This workflow performs quantization with Optimum Intel®. It performs the optimization pipeline:
11+
12+
- *Huggingface Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
13+
14+
### Gemma 3 1b it NPU
15+
16+
The flow in the following config file executes the above workflow producing a dynamic shape model.
17+
18+
1. [gemma_3_1b_it_context_ov_npu_config.json](gemma_3_1b_it_context_ov_npu_config.json)
19+
20+
### Gemma 3 1b it GPU
21+
22+
The flow in the following config file executes the above workflow producing a dynamic shape model.
23+
24+
1. [gemma_3_1b_it_context_ov_gpu_config.json](gemma_3_1b_it_context_ov_gpu_config.json)
25+
26+
## How to run
27+
28+
### Setup
29+
30+
Install the necessary python packages:
31+
32+
```bash
33+
python -m pip install olive-ai[openvino]
34+
python -m pip install -r requirements.txt
35+
```
36+
37+
### Run Olive config
38+
39+
The optimization techniques to run are specified in the relevant config json file.
40+
41+
Optimize the model using the following command:
42+
43+
```bash
44+
olive run --config <config_file.json>
45+
```
46+
47+
Example:
48+
49+
```bash
50+
olive run --config gemma_3_1b_it_context_ov_npu_config.json
51+
```
52+
53+
or run simply with python code:
54+
55+
```python
56+
from olive import run
57+
workflow_output = run("<config_file.json>")
58+
```
59+
60+
After running the above command, the model candidates and corresponding config will be saved in the output directory.
61+
62+
### (Optional) Run Console-Based Chat Interface
63+
64+
To run ONNX OpenVINO IR Encapsulated GenAI models, please setup latest ONNXRuntime GenAI with ONNXRuntime OpenVINO EP support.
65+
66+
The sample chat app to run is found as [model-chat.py](https://github.com/microsoft/onnxruntime-genai/blob/main/examples/python/model-chat.py) in the [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai/) GitHub repository.
67+
68+
```bash
69+
python model-chat.py -e follow_config -v -g -m models/<model_folder>/
70+
```
71+
72+
Example:
73+
74+
```bash
75+
python model-chat.py -e follow_config -v -g -m models/gemma_3_1b_it_context_ov_npu/
76+
```
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"input_model": {
3+
"type": "HfModel",
4+
"model_path": "google/gemma-3-1b-it"
5+
},
6+
"systems": {
7+
"local_system": {
8+
"type": "LocalSystem",
9+
"accelerators": [
10+
{
11+
"device": "gpu",
12+
"execution_providers": [
13+
"OpenVINOExecutionProvider"
14+
]
15+
}
16+
]
17+
}
18+
},
19+
"passes": {
20+
"optimum_convert": {
21+
"type": "OpenVINOOptimumConversion",
22+
"extra_args": {
23+
"device": "gpu"
24+
},
25+
"ov_quant_config": {
26+
"weight_format": "int4",
27+
"group_size": 128,
28+
"ratio": 0.8,
29+
"trust_remote_code": true
30+
}
31+
},
32+
"encapsulation": {
33+
"type": "OpenVINOEncapsulation",
34+
"target_device": "gpu",
35+
"keep_ov_dynamic_dims": true,
36+
"ov_version": "2026.1",
37+
"reuse_cache": true,
38+
"genai_config_override": {
39+
"model": {
40+
"context_length": 32768
41+
},
42+
"search": {
43+
"do_sample": true,
44+
"max_length": 32768,
45+
"repetition_penalty": 1.1,
46+
"temperature": 1.0,
47+
"top_k": 64,
48+
"top_p": 0.95
49+
}
50+
}
51+
}
52+
},
53+
"search_strategy": false,
54+
"target": "local_system",
55+
"evaluate_input_model": false,
56+
"output_dir": "models/gemma_3_1b_it_context_ov_gpu"
57+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{
2+
"input_model": {
3+
"type": "HfModel",
4+
"model_path": "google/gemma-3-1b-it"
5+
},
6+
"systems": {
7+
"local_system": {
8+
"type": "LocalSystem",
9+
"accelerators": [
10+
{
11+
"device": "npu",
12+
"execution_providers": [
13+
"OpenVINOExecutionProvider"
14+
]
15+
}
16+
]
17+
}
18+
},
19+
"passes": {
20+
"optimum_convert": {
21+
"type": "OpenVINOOptimumConversion",
22+
"extra_args": {
23+
"device": "npu"
24+
},
25+
"ov_quant_config": {
26+
"weight_format": "int4",
27+
"group_size": 128,
28+
"ratio": 1,
29+
"dataset": "wikitext2",
30+
"sym": true,
31+
"trust_remote_code": true,
32+
"sensitivity_metric": "weight_quantization_error"
33+
}
34+
},
35+
"encapsulation": {
36+
"type": "OpenVINOEncapsulation",
37+
"target_device": "npu",
38+
"keep_ov_dynamic_dims": true,
39+
"ov_version": "2026.1",
40+
"reuse_cache": true,
41+
"genai_config_override": {
42+
"model": {
43+
"context_length": 32768,
44+
"decoder": {
45+
"session_options": {
46+
"provider_options": [
47+
{
48+
"OpenVINO": {
49+
"device_type": "NPU",
50+
"enable_causallm": "True",
51+
"load_config": "{\"NPU\":{\"MAX_PROMPT_LEN\":\"4096\",\"MIN_RESPONSE_LEN\":\"128\"}}"
52+
}
53+
}
54+
]
55+
}
56+
}
57+
},
58+
"search": {
59+
"do_sample": true,
60+
"max_length": 32768,
61+
"repetition_penalty": 1.1,
62+
"temperature": 1.0,
63+
"top_k": 64,
64+
"top_p": 0.95
65+
}
66+
}
67+
}
68+
},
69+
"search_strategy": false,
70+
"target": "local_system",
71+
"evaluate_input_model": false,
72+
"output_dir": "models/gemma_3_1b_it_context_ov_npu"
73+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
keywords:
2+
openvino
3+
olive
4+
arch: gemma
5+
recipes:
6+
- file: "gemma_3_1b_it_context_ov_npu_config.json"
7+
devices:
8+
- npu
9+
ep: OpenVINOExecutionProvider
10+
- file: "gemma_3_1b_it_context_ov_gpu_config.json"
11+
devices:
12+
- gpu
13+
ep: OpenVINOExecutionProvider
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
optimum-intel
2+
transformers

0 commit comments

Comments
 (0)