diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml index 9069228efd..105285a7a8 100644 --- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml +++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml @@ -1,6 +1,6 @@ path: container_name: models - container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cuda/v1 + container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cuda/v2 storage_name: foundrylocalassetdata type: azureblob publish: diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml index b1e969e83e..1943c9dd7e 100644 --- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml +++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json name: nemotron-3.5-asr-streaming-0.6b-cuda-gpu -version: 1 +version: 2 isArchived: true path: ./ tags: @@ -13,7 +13,7 @@ tags: task: automatic-speech-recognition maxOutputTokens: 2048 alias: nemotron-3.5-asr-streaming-0.6b - directoryPath: v1 + directoryPath: v2 promptTemplate: "" capabilities: "" supportsReasoning: "" @@ -31,5 +31,5 @@ variantInfo: quantization: ['RTN'] device: 'gpu' executionProvider: 'CUDAExecutionProvider' - fileSizeBytes: 793344637 - vRamFootprintBytes: 793344637 + fileSizeBytes: 793344782 + vRamFootprintBytes: 793344782 diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml index c165a362cb..88704bb607 100644 --- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml +++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml @@ -1,6 +1,6 @@ path: container_name: models - container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cpu_and_mobile/v2 + container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cpu_and_mobile/v3 storage_name: foundrylocalassetdata type: azureblob publish: diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml index 9bd53d0356..0b3fa85b31 100644 --- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml +++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json name: nemotron-3.5-asr-streaming-0.6b-generic-cpu -version: 2 +version: 3 isArchived: true path: ./ tags: @@ -13,7 +13,7 @@ tags: task: automatic-speech-recognition maxOutputTokens: 2048 alias: nemotron-3.5-asr-streaming-0.6b - directoryPath: v2 + directoryPath: v3 promptTemplate: "" capabilities: "" supportsReasoning: "" @@ -31,5 +31,5 @@ variantInfo: quantization: ['RTN'] device: 'cpu' executionProvider: 'CPUExecutionProvider' - fileSizeBytes: 793344112 - vRamFootprintBytes: 793344112 + fileSizeBytes: 793344362 + vRamFootprintBytes: 793344362 diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/asset.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/asset.yaml new file mode 100644 index 0000000000..eb27bbc7f8 --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/asset.yaml @@ -0,0 +1,4 @@ +extra_config: model.yaml +spec: spec.yaml +type: model +categories: ["Local"] diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/description.md b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/description.md new file mode 100644 index 0000000000..d2cdbec812 --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/description.md @@ -0,0 +1,13 @@ +This model is an optimized version of nemotron-speech-streaming-en-0.6b to enable local inference on GPUs. This model uses RTN quantization. + +# Model Description + +- **Developed by:** Microsoft +- **Model type:** ONNX +- **License:** MIT +- **Model Description:** This is an optimized version of the nemotron-speech-streaming-en-0.6b model for local inference on GPUs. +- **Disclaimer:** Model is only an optimization of the base model, any risk associated with the model is the responsibility of the user of the model. Please verify and test for your scenarios. There may be a slight difference in output from the base model with the optimizations applied. Note that optimizations applied are distinct from fine tuning and thus do not alter the intended uses or capabilities of the model. + +# Base Model Information + +See Hugging Face model [nemotron-speech-streaming-en-0.6b](https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b) for details. diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/model.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/model.yaml new file mode 100644 index 0000000000..418d395089 --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/model.yaml @@ -0,0 +1,8 @@ +path: + container_name: models + container_path: foundrylocal/models/nemotron-speech-streaming-en-0.6b/onnx/cuda/v1 + storage_name: foundrylocalassetdata + type: azureblob +publish: + description: description.md + type: custom_model diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/spec.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/spec.yaml new file mode 100644 index 0000000000..69ce0bb19f --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/spec.yaml @@ -0,0 +1,35 @@ +$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json +name: nemotron-speech-streaming-en-0.6b-cuda-gpu +version: 1 +isArchived: true +path: ./ +tags: + foundryLocal: "" + license: "MIT" + licenseDescription: "This model is provided under the License Terms available at ." + author: Microsoft + inputModalities: "audio" + outputModalities: "text" + task: automatic-speech-recognition + maxOutputTokens: 2048 + alias: nemotron-speech-streaming-en-0.6b + directoryPath: v1 + promptTemplate: "" + capabilities: "" + supportsReasoning: "" + reasoningStart: "" + reasoningEnd: "" + contextLength: 0 + minFLVersion: "1.1.0" + disable-maap: "true" +type: custom_model +variantInfo: + parents: + - assetId: azureml://registries/azureml/models/nemotron-speech-streaming-en-0.6b/versions/1 + variantMetadata: + modelType: 'ONNX' + quantization: ['RTN'] + device: 'gpu' + executionProvider: 'CUDAExecutionProvider' + fileSizeBytes: 730746955 + vRamFootprintBytes: 730746955 diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/asset.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/asset.yaml new file mode 100644 index 0000000000..eb27bbc7f8 --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/asset.yaml @@ -0,0 +1,4 @@ +extra_config: model.yaml +spec: spec.yaml +type: model +categories: ["Local"] diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/description.md b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/description.md new file mode 100644 index 0000000000..c11e8c4e25 --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/description.md @@ -0,0 +1,13 @@ +This model is a fine-tuned and optimized derivative of nemotron-speech-streaming-en-0.6b, adapted for Spanish speech recognition. The model is optimized for local inference on GPUs. + +# Model Description + +- **Developed by:** Microsoft +- **Model type:** ONNX +- **License:** MIT +- **Model Description:** This model is derived from nemotron-speech-streaming-en-0.6b and has been fine-tuned for Spanish speech recognition. It has been converted and optimized for efficient local inference on GPUs. +- **Disclaimer:** This model is a fine-tuned and optimized derivative of the base model, adapted for Spanish speech recognition. Due to this adaptation, its capabilities differ substantially from the original English model and are specialized for Spanish-language use cases. The model may not perform as expected on other languages or tasks. Users are responsible for evaluating the model in their specific application context. + +# Base Model Information + +See Hugging Face model https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b for details. diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/model.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/model.yaml new file mode 100644 index 0000000000..ba0e1a769d --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/model.yaml @@ -0,0 +1,8 @@ +path: + container_name: models + container_path: foundrylocal/models/nemotron-speech-streaming-es-0.6b-ft/onnx/cuda/v1 + storage_name: foundrylocalassetdata + type: azureblob +publish: + description: description.md + type: custom_model diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/spec.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/spec.yaml new file mode 100644 index 0000000000..7e5fd3e9de --- /dev/null +++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/spec.yaml @@ -0,0 +1,35 @@ +$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json +name: nemotron-speech-streaming-es-0.6b-ft-cuda-gpu +version: 1 +isArchived: true +path: ./ +tags: + foundryLocal: "" + license: "MIT" + licenseDescription: "This model is provided under the License Terms available at ." + author: Microsoft + inputModalities: "audio" + outputModalities: "text" + task: automatic-speech-recognition + maxOutputTokens: 2048 + alias: nemotron-speech-streaming-es-0.6b + directoryPath: v1 + promptTemplate: "" + capabilities: "" + supportsReasoning: "" + reasoningStart: "" + reasoningEnd: "" + contextLength: 0 + minFLVersion: "1.1.0" + disable-maap: "true" +type: custom_model +variantInfo: + parents: + - assetId: azureml://registries/azureml/models/nemotron-speech-streaming-en-0.6b/versions/1 + variantMetadata: + modelType: 'ONNX' + quantization: ['RTN'] + device: 'gpu' + executionProvider: 'CUDAExecutionProvider' + fileSizeBytes: 764952910 + vRamFootprintBytes: 764952910