Skip to content

Commit 187a157

Browse files
authored
Update Whisper CUDA models to int4/int8 quantized ones (#5086)
1 parent d754e3f commit 187a157

10 files changed

Lines changed: 26 additions & 26 deletions

File tree

assets/models/foundrylocal/openai-whisper-base-cuda-gpu/model.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
path:
22
container_name: models
3-
container_path: foundrylocal/models/openai-whisper-base/onnx/cuda/v2
3+
container_path: foundrylocal/models/openai-whisper-base/onnx/cuda/v3
44
storage_name: foundrylocalassetdata
55
type: azureblob
66
publish:

assets/models/foundrylocal/openai-whisper-base-cuda-gpu/spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
22
name: openai-whisper-base-cuda-gpu
3-
version: 2
3+
version: 3
44
isArchived: true
55
path: ./
66
tags:
@@ -13,7 +13,7 @@ tags:
1313
task: automatic-speech-recognition
1414
maxOutputTokens: 2048
1515
alias: whisper-base
16-
directoryPath: v2
16+
directoryPath: v3
1717
promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
1818
contextLength: 448
1919
capabilities: ""
@@ -31,5 +31,5 @@ variantInfo:
3131
quantization: ['RTN']
3232
device: 'gpu'
3333
executionProvider: 'CUDAExecutionProvider'
34-
fileSizeBytes: 204272297
35-
vRamFootprintBytes: 204272297
34+
fileSizeBytes: 130003264
35+
vRamFootprintBytes: 130003264

assets/models/foundrylocal/openai-whisper-large-v3-turbo-cuda-gpu/model.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
path:
2-
container_name: whisper-models
3-
container_path: whisper-large-v3-turbo/cuda-fp16/v2
2+
container_name: models
3+
container_path: foundrylocal/models/openai-whisper-large-v3-turbo/onnx/cuda/v3
44
storage_name: foundrylocalassetdata
55
type: azureblob
66
publish:

assets/models/foundrylocal/openai-whisper-large-v3-turbo-cuda-gpu/spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
22
name: openai-whisper-large-v3-turbo-cuda-gpu
3-
version: 2
3+
version: 3
44
isArchived: true
55
path: ./
66
tags:
@@ -13,7 +13,7 @@ tags:
1313
task: automatic-speech-recognition
1414
maxOutputTokens: 2048
1515
alias: whisper-large-v3-turbo
16-
directoryPath: v2
16+
directoryPath: v3
1717
promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
1818
contextLength: 448
1919
capabilities: ""
@@ -31,5 +31,5 @@ variantInfo:
3131
quantization: ['RTN']
3232
device: 'gpu'
3333
executionProvider: 'CUDAExecutionProvider'
34-
fileSizeBytes: 9438190632
35-
vRamFootprintBytes: 9438528266
34+
fileSizeBytes: 842333753
35+
vRamFootprintBytes: 842333753

assets/models/foundrylocal/openai-whisper-medium-cuda-gpu/model.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
path:
22
container_name: models
3-
container_path: foundrylocal/models/openai-whisper-medium/onnx/cuda/v2
3+
container_path: foundrylocal/models/openai-whisper-medium/onnx/cuda/v3
44
storage_name: foundrylocalassetdata
55
type: azureblob
66
publish:

assets/models/foundrylocal/openai-whisper-medium-cuda-gpu/spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
22
name: openai-whisper-medium-cuda-gpu
3-
version: 2
3+
version: 3
44
isArchived: true
55
path: ./
66
tags:
@@ -13,7 +13,7 @@ tags:
1313
task: automatic-speech-recognition
1414
maxOutputTokens: 2048
1515
alias: whisper-medium
16-
directoryPath: v2
16+
directoryPath: v3
1717
promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
1818
contextLength: 448
1919
capabilities: ""
@@ -31,5 +31,5 @@ variantInfo:
3131
quantization: ['RTN']
3232
device: 'gpu'
3333
executionProvider: 'CUDAExecutionProvider'
34-
fileSizeBytes: 1640305824
35-
vRamFootprintBytes: 1640305824
34+
fileSizeBytes: 670344241
35+
vRamFootprintBytes: 670344241

assets/models/foundrylocal/openai-whisper-small-cuda-gpu/model.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
path:
22
container_name: models
3-
container_path: foundrylocal/models/openai-whisper-small/onnx/cuda/v2
3+
container_path: foundrylocal/models/openai-whisper-small/onnx/cuda/v3
44
storage_name: foundrylocalassetdata
55
type: azureblob
66
publish:

assets/models/foundrylocal/openai-whisper-small-cuda-gpu/spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
22
name: openai-whisper-small-cuda-gpu
3-
version: 2
3+
version: 3
44
isArchived: true
55
path: ./
66
tags:
@@ -13,7 +13,7 @@ tags:
1313
task: automatic-speech-recognition
1414
maxOutputTokens: 2048
1515
alias: whisper-small
16-
directoryPath: v2
16+
directoryPath: v3
1717
promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
1818
contextLength: 448
1919
capabilities: ""
@@ -31,5 +31,5 @@ variantInfo:
3131
quantization: ['RTN']
3232
device: 'gpu'
3333
executionProvider: 'CUDAExecutionProvider'
34-
fileSizeBytes: 569232292
35-
vRamFootprintBytes: 572890964
34+
fileSizeBytes: 316267734
35+
vRamFootprintBytes: 316267734

assets/models/foundrylocal/openai-whisper-tiny-cuda-gpu/model.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
path:
22
container_name: models
3-
container_path: foundrylocal/models/openai-whisper-tiny/onnx/cuda/v2
3+
container_path: foundrylocal/models/openai-whisper-tiny/onnx/cuda/v3
44
storage_name: foundrylocalassetdata
55
type: azureblob
66
publish:

assets/models/foundrylocal/openai-whisper-tiny-cuda-gpu/spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
22
name: openai-whisper-tiny-cuda-gpu
3-
version: 2
3+
version: 3
44
isArchived: true
55
path: ./
66
tags:
@@ -13,7 +13,7 @@ tags:
1313
task: automatic-speech-recognition
1414
maxOutputTokens: 2048
1515
alias: whisper-tiny
16-
directoryPath: v2
16+
directoryPath: v3
1717
promptTemplate: "{\"prompt\": \"<|startoftranscript|> <|en|> <|transcribe|> <|notimestamps|>\"}"
1818
contextLength: 448
1919
capabilities: ""
@@ -31,5 +31,5 @@ variantInfo:
3131
quantization: ['RTN']
3232
device: 'gpu'
3333
executionProvider: 'CUDAExecutionProvider'
34-
fileSizeBytes: 121285804
35-
vRamFootprintBytes: 121285804
34+
fileSizeBytes: 89899070
35+
vRamFootprintBytes: 89899070

0 commit comments

Comments
 (0)