Fix optimize CLI to set system EP and device (microsoft#2418)

jambayk · Copilot · web-flow · commit 8caee1c77125 · 2026-04-17T10:27:58.000-07:00
## Describe your changes

Fix the `olive optimize` CLI to properly configure the system execution
provider and device in the generated workflow config.

- `_update_system_config` now creates the `local_system` with the
specified execution provider and optional device. Previously only the
QNN AOT case was handled, leaving the system config empty.
- When model builder is used as the exporter, the `OnnxFloatToFloat16`
pass is skipped since model builder already produces the model in fp16.
- `test_optimize_cli_pass_list` now verifies the system EP and device
are correctly set in the generated config for all test cases.

## Checklist before requesting a review
- [x] Add unit tests for this change.
- [x] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [x] Lint and apply fixes to your code by running `lintrunner -a`
- [x] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- `olive optimize` now correctly sets the target system execution
provider and device.

## (Optional) Issue link

---------

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py
@@ -294,6 +294,19 @@ def _update_system_config(self, config: dict[str, Any]):
         """Update system configuration based on provider and device."""
         provider = ExecutionProvider(self.args.provider)
 
+        accelerator = {"execution_providers": [provider.value]}
+        if self.args.device:
+            accelerator["device"] = self.args.device
+        if self.args.memory is not None:
+            accelerator["memory"] = self.args.memory
+
+        config["systems"]["local_system"] = {
+            "type": "LocalSystem",
+            "accelerators": [accelerator],
+        }
+
+        config["target"] = "local_system"
+
         if provider == ExecutionProvider.QNNExecutionProvider and self.args.enable_aot:
             config["systems"]["qnn_system"] = {
                 "type": "PythonEnvironment",
@@ -622,7 +635,7 @@ def _get_onnx_blockwise_rtn_quantization_pass_config(self) -> dict[str, Any]:
     def _enable_onnx_float_to_float16_pass(self) -> bool:
         """Return true if condition to add OnnxFloatToFloat16 pass is met."""
         precision = Precision(self.args.precision)
-        return precision == Precision.FP16
+        return precision == Precision.FP16 and not self.enable_model_builder
 
     def _get_onnx_float_to_float16_pass_config(self) -> dict[str, Any]:
         """Return pass dictionary for OnnxFloatToFloat16 pass."""
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
@@ -493,6 +493,8 @@ def test_optimize_cli_pass_list(mock_repo_exists, mock_run, tmp_path):
     # setup
     output_dir = "output_dir"
 
+    # Each entry: [command, args, expected_passes, expected_device, expected_ep]
+    # expected_device is None when --device is not specified (olive infers it at runtime)
     test_list = [
         [
             "optimize",
@@ -504,6 +506,8 @@ def test_optimize_cli_pass_list(mock_repo_exists, mock_run, tmp_path):
                 "QuaRot, Gptq, CaptureSplitInfo, ModelBuilder, MatMulNBitsToQDQ, GraphSurgeries, "
                 "OnnxStaticQuantization, SplitModel, StaticLLM"
             ),
+            None,
+            "QNNExecutionProvider",
         ],
         [
             "optimize",
@@ -515,16 +519,22 @@ def test_optimize_cli_pass_list(mock_repo_exists, mock_run, tmp_path):
                 "QuaRot, Gptq, CaptureSplitInfo, ModelBuilder, MatMulNBitsToQDQ, GraphSurgeries, "
                 "OnnxStaticQuantization, VitisAIAddMetaData, SplitModel, StaticLLM"
             ),
+            None,
+            "VitisAIExecutionProvider",
         ],
         [
             "optimize",
             "--precision int4 --act_precision int16 --provider OpenVINOExecutionProvider  --device gpu",
             "OpenVINOOptimumConversion, OpenVINOIoUpdate, OpenVINOEncapsulation",
+            "gpu",
+            "OpenVINOExecutionProvider",
         ],
         [
             "optimize",
             "-t text-classification --precision int8 --exporter torchscript_exporter",
             "OnnxConversion, OnnxPeepholeOptimizer, OrtTransformersOptimization, OnnxStaticQuantization",
+            None,
+            "CPUExecutionProvider",
         ],
         [
             "optimize",
@@ -536,11 +546,22 @@ def test_optimize_cli_pass_list(mock_repo_exists, mock_run, tmp_path):
                 "OnnxConversion, DynamicToFixedShape, OnnxPeepholeOptimizer, OrtTransformersOptimization, "
                 "OnnxStaticQuantization, StaticLLM"
             ),
+            "npu",
+            "QNNExecutionProvider",
         ],
         [
             "optimize",
             "-t text-classification --precision fp16 --exporter torchscript_exporter --provider CUDAExecutionProvider",
             "OnnxConversion, OnnxPeepholeOptimizer, OrtTransformersOptimization, OnnxFloatToFloat16",
+            None,
+            "CUDAExecutionProvider",
+        ],
+        [
+            "optimize",
+            "--precision fp16 --provider CUDAExecutionProvider",
+            "ModelBuilder",
+            None,
+            "CUDAExecutionProvider",
         ],
         [
             "optimize",
@@ -549,6 +570,8 @@ def test_optimize_cli_pass_list(mock_repo_exists, mock_run, tmp_path):
                 " NvTensorRTRTXExecutionProvider --device gpu"
             ),
             "OnnxConversion, OnnxPeepholeOptimizer, OnnxFloatToFloat16",
+            "gpu",
+            "NvTensorRTRTXExecutionProvider",
         ],
     ]
 
@@ -577,6 +600,20 @@ def test_optimize_cli_pass_list(mock_repo_exists, mock_run, tmp_path):
 
         assert pass_list == [item.strip() for item in t[2].split(",")]
 
+        # Verify system config has correct device and execution provider
+        accelerator = data["systems"]["local_system"]["accelerators"][0]
+        expected_device = t[3]
+        expected_ep = t[4]
+        if expected_device is None:
+            assert "device" not in accelerator, f"Expected no device but got '{accelerator.get('device')}'"
+        else:
+            assert accelerator["device"] == expected_device, (
+                f"Expected device '{expected_device}' but got '{accelerator.get('device')}'"
+            )
+        assert accelerator["execution_providers"] == [expected_ep], (
+            f"Expected EP '{expected_ep}' but got '{accelerator['execution_providers']}'"
+        )
+
 
 @patch("olive.workflows.run")
 @patch("huggingface_hub.repo_exists", return_value=True)