Update Whisper model to a quantized version (#5013)

hanbitmyths · kunal-vaishnavi · web-flow · commit cc79c5abcea7 · 2026-05-09T13:48:51.000-07:00
Co-authored-by: kunal-vaishnavi &lt;115581922+kunal-vaishnavi@users.noreply.github.com&gt;
diff --git a/assets/models/foundrylocal/openai-whisper-base-generic-cpu/model.yaml b/assets/models/foundrylocal/openai-whisper-base-generic-cpu/model.yaml
@@ -1,7 +1,7 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/openai-whisper-base/onnx/cpu_and_mobile/v2
-  storage_name: foundrylocalmodels
+  container_path: foundrylocal/models/openai-whisper-base/onnx/cpu_and_mobile/v3
+  storage_name: foundrylocalassetdata
   type: azureblob
 publish:
   description: description.md
diff --git a/assets/models/foundrylocal/openai-whisper-base-generic-cpu/spec.yaml b/assets/models/foundrylocal/openai-whisper-base-generic-cpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: openai-whisper-base-generic-cpu
-version: 2
+version: 3
 path: ./
 tags:
   foundryLocal: ""
@@ -12,7 +12,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: whisper-base
-  directoryPath: v2
+  directoryPath: v3
   promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
   contextLength: 448
   capabilities: ""
@@ -30,5 +30,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 402590167
-    vRamFootprintBytes: 402590167
+    fileSizeBytes: 215285518
+    vRamFootprintBytes: 215285518
diff --git a/assets/models/foundrylocal/openai-whisper-large-v3-turbo-generic-cpu/model.yaml b/assets/models/foundrylocal/openai-whisper-large-v3-turbo-generic-cpu/model.yaml
@@ -1,7 +1,7 @@
 path:
-  container_name: whisper-models
-  container_path: whisper-large-v3-turbo/cpu-fp32/v2
-  storage_name: foundrylocalmodels
+  container_name: models
+  container_path: foundrylocal/models/openai-whisper-large-v3-turbo/onnx/cpu_and_mobile/v3
+  storage_name: foundrylocalassetdata
   type: azureblob
 publish:
   description: description.md
diff --git a/assets/models/foundrylocal/openai-whisper-large-v3-turbo-generic-cpu/spec.yaml b/assets/models/foundrylocal/openai-whisper-large-v3-turbo-generic-cpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: openai-whisper-large-v3-turbo-generic-cpu
-version: 2
+version: 3
 path: ./
 tags:
   foundryLocal: ""
@@ -12,7 +12,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: whisper-large-v3-turbo
-  directoryPath: v2
+  directoryPath: v3
   promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
   contextLength: 448
   capabilities: ""
@@ -30,5 +30,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 9438190632
-    vRamFootprintBytes: 9438528266
+    fileSizeBytes: 1678544910
+    vRamFootprintBytes: 1678544910
diff --git a/assets/models/foundrylocal/openai-whisper-medium-generic-cpu/model.yaml b/assets/models/foundrylocal/openai-whisper-medium-generic-cpu/model.yaml
@@ -1,7 +1,7 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/openai-whisper-medium/onnx/cpu_and_mobile/v2
-  storage_name: foundrylocalmodels
+  container_path: foundrylocal/models/openai-whisper-medium/onnx/cpu_and_mobile/v3
+  storage_name: foundrylocalassetdata
   type: azureblob
 publish:
   description: description.md
diff --git a/assets/models/foundrylocal/openai-whisper-medium-generic-cpu/spec.yaml b/assets/models/foundrylocal/openai-whisper-medium-generic-cpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: openai-whisper-medium-generic-cpu
-version: 2
+version: 3
 path: ./
 tags:
   foundryLocal: ""
@@ -12,7 +12,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: whisper-medium
-  directoryPath: v2
+  directoryPath: v3
   promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
   contextLength: 448
   capabilities: ""
@@ -30,5 +30,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 3274421068
-    vRamFootprintBytes: 3274421068
+    fileSizeBytes: 1334385110
+    vRamFootprintBytes: 1334385110
diff --git a/assets/models/foundrylocal/openai-whisper-small-generic-cpu/model.yaml b/assets/models/foundrylocal/openai-whisper-small-generic-cpu/model.yaml
@@ -1,7 +1,7 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/openai-whisper-small/onnx/cpu_and_mobile/v2
-  storage_name: foundrylocalmodels
+  container_path: foundrylocal/models/openai-whisper-small/onnx/cpu_and_mobile/v3
+  storage_name: foundrylocalassetdata
   type: azureblob
 publish:
   description: description.md
diff --git a/assets/models/foundrylocal/openai-whisper-small-generic-cpu/spec.yaml b/assets/models/foundrylocal/openai-whisper-small-generic-cpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: openai-whisper-small-generic-cpu
-version: 2
+version: 3
 path: ./
 tags:
   foundryLocal: ""
@@ -12,7 +12,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: whisper-small
-  directoryPath: v2
+  directoryPath: v3
   promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
   contextLength: 448
   capabilities: ""
@@ -30,5 +30,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 1132432089
-    vRamFootprintBytes: 1132432089
+    fileSizeBytes: 516028869
+    vRamFootprintBytes: 516028869
diff --git a/assets/models/foundrylocal/openai-whisper-tiny-generic-cpu/model.yaml b/assets/models/foundrylocal/openai-whisper-tiny-generic-cpu/model.yaml
@@ -1,7 +1,7 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/openai-whisper-tiny/onnx/cpu_and_mobile/v3
-  storage_name: foundrylocalmodels
+  container_path: foundrylocal/models/openai-whisper-tiny/onnx/cpu_and_mobile/v4
+  storage_name: foundrylocalassetdata
   type: azureblob
 publish:
   description: description.md
diff --git a/assets/models/foundrylocal/openai-whisper-tiny-generic-cpu/spec.yaml b/assets/models/foundrylocal/openai-whisper-tiny-generic-cpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: openai-whisper-tiny-generic-cpu
-version: 3
+version: 4
 path: ./
 tags:
   foundryLocal: ""
@@ -12,7 +12,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: whisper-tiny
-  directoryPath: v3
+  directoryPath: v4
   promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
   contextLength: 448
   capabilities: ""
@@ -30,5 +30,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 236654475
-    vRamFootprintBytes: 236654475
+    fileSizeBytes: 138106077
+    vRamFootprintBytes: 138106077