QNN Recipes: Remove extra_options (microsoft#149)

jambayk · web-flow · commit 0149ee888f47 · 2025-10-23T10:00:57.000-07:00
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/QNN/config.json b/Qwen-Qwen2.5-1.5B-Instruct/QNN/config.json
@@ -74,8 +74,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json
@@ -112,10 +112,7 @@
                 "GroupQueryAttention",
                 "MatMulNBits"
             ],
-            "save_as_external_data": true,
-            "extra_options": {
-                "CalibStridedMinMax": 4
-            }
+            "save_as_external_data": true
         },
         "sp": {
             "type": "SplitModel"
diff --git a/Qwen-Qwen2.5-7B-Instruct/QNN/config.json b/Qwen-Qwen2.5-7B-Instruct/QNN/config.json
@@ -74,8 +74,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json
@@ -112,10 +112,7 @@
                 "GroupQueryAttention",
                 "MatMulNBits"
             ],
-            "save_as_external_data": true,
-            "extra_options": {
-                "CalibStridedMinMax": 4
-            }
+            "save_as_external_data": true
         },
         "sp": {
             "type": "SplitModel"
diff --git a/meta-llama-Llama-3.1-8B-Instruct/QNN/config.json b/meta-llama-Llama-3.1-8B-Instruct/QNN/config.json
@@ -74,8 +74,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json
@@ -112,10 +112,7 @@
                 "GroupQueryAttention",
                 "MatMulNBits"
             ],
-            "save_as_external_data": true,
-            "extra_options": {
-                "CalibStridedMinMax": 4
-            }
+            "save_as_external_data": true
         },
         "sp": {
             "type": "SplitModel"
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json
@@ -112,10 +112,7 @@
                 "GroupQueryAttention",
                 "MatMulNBits"
             ],
-            "save_as_external_data": true,
-            "extra_options": {
-                "CalibStridedMinMax": 4
-            }
+            "save_as_external_data": true
         },
         "sp": {
             "type": "SplitModel"
diff --git a/microsoft-Phi-3-mini-128k-instruct/QNN/config.json b/microsoft-Phi-3-mini-128k-instruct/QNN/config.json
@@ -74,8 +74,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/microsoft-Phi-3-mini-4k-instruct/QNN/config.json b/microsoft-Phi-3-mini-4k-instruct/QNN/config.json
@@ -74,8 +74,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/microsoft-Phi-3.5-mini-instruct/QNN/config.json b/microsoft-Phi-3.5-mini-instruct/QNN/config.json
@@ -72,8 +72,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/microsoft-Phi-3.5-mini-instruct/QNN/config_fp16.json b/microsoft-Phi-3.5-mini-instruct/QNN/config_fp16.json
@@ -78,8 +78,7 @@
             "calibration_providers": [ "CUDAExecutionProvider" ],
             "quant_preprocess": true,
             "op_types_to_exclude": [ "Cast", "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
-            "save_as_external_data": true,
-            "extra_options": { "CalibStridedMinMax": 4 }
+            "save_as_external_data": true
         },
         "sp": { "type": "SplitModel" },
         "st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json
@@ -112,10 +112,7 @@
                 "GroupQueryAttention",
                 "MatMulNBits"
             ],
-            "save_as_external_data": true,
-            "extra_options": {
-                "CalibStridedMinMax": 4
-            }
+            "save_as_external_data": true
         },
         "sp": {
             "type": "SplitModel"