gemma-3-1b-it (microsoft#150)

vortex-captain · Yi Ren · web-flow · commit 5eb2a5a12406 · 2025-10-24T15:23:18.000+08:00
Co-authored-by: Yi Ren &lt;reny@microsoft.com&gt;
diff --git a/google-gemma/olive/README.md b/google-gemma/olive/README.md
@@ -25,3 +25,11 @@ The exported ONNX model is saved in `output_model` folder.
 To run the ONNX GenAI model, please set up the latest ONNXRuntime GenAI.
 
 The sample chat app to run is found as [model-chat.py](https://github.com/microsoft/onnxruntime-genai/blob/main/examples/python/model-chat.py) in the [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai/) Github repository.
+
+## google/gemma-3-1b-it
+
+```bash
+python -m pip install -r requirements.txt
+# Use the following command to export the model using Olive with CPUExecutionProvider at FP32 precision:
+olive run --config gemma-3-1b-it_model_builder_cpu_fp32.json
+```
diff --git a/google-gemma/olive/gemma-3-1b-it_model_builder_cpu_fp32.json b/google-gemma/olive/gemma-3-1b-it_model_builder_cpu_fp32.json
@@ -0,0 +1,18 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google/gemma-3-1b-it",
+        "task": "text-generation"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ]
+        }
+    },
+    "engine": { "target": "local_system" },
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp32"}
+
+    }
+}
diff --git a/google-gemma/olive/info.yml b/google-gemma/olive/info.yml
@@ -7,3 +7,7 @@ recipes:
     - file: "README.md"
       device: gpu
       ep: CUDAExecutionProvider
+    - name: gemma-3-1b-it_model_builder_cpu_FP32
+      file: gemma-3-1b-it_model_builder_cpu_fp32.json
+      devices: cpu
+      eps: CPUExecutionProvider