Azure · hanbitmyths · Jun 5, 2026 · Jun 5, 2026
@@ -1,6 +1,6 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cuda/v1
+  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cuda/v2
   storage_name: foundrylocalassetdata
   type: azureblob
 publish:

@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: nemotron-3.5-asr-streaming-0.6b-cuda-gpu
-version: 1
+version: 2
 isArchived: true
 path: ./
 tags:
@@ -13,7 +13,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: nemotron-3.5-asr-streaming-0.6b
-  directoryPath: v1
+  directoryPath: v2
   promptTemplate: ""
   capabilities: ""
   supportsReasoning: ""
@@ -31,5 +31,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'gpu'
     executionProvider: 'CUDAExecutionProvider'
-    fileSizeBytes: 793344637
-    vRamFootprintBytes: 793344637
+    fileSizeBytes: 793344782
+    vRamFootprintBytes: 793344782
@@ -1,6 +1,6 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cpu_and_mobile/v2
+  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cpu_and_mobile/v3
   storage_name: foundrylocalassetdata
   type: azureblob
 publish:

@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: nemotron-3.5-asr-streaming-0.6b-generic-cpu
-version: 2
+version: 3
 isArchived: true
 path: ./
 tags:
@@ -13,7 +13,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: nemotron-3.5-asr-streaming-0.6b
-  directoryPath: v2
+  directoryPath: v3
   promptTemplate: ""
   capabilities: ""
   supportsReasoning: ""
@@ -31,5 +31,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 793344112
-    vRamFootprintBytes: 793344112
+    fileSizeBytes: 793344362
+    vRamFootprintBytes: 793344362
@@ -0,0 +1,4 @@
+extra_config: model.yaml
+spec: spec.yaml
+type: model
+categories: ["Local"]
@@ -0,0 +1,13 @@
+This model is an optimized version of nemotron-speech-streaming-en-0.6b to enable local inference on GPUs. This model uses RTN quantization.
+
+# Model Description
+
+- **Developed by:** Microsoft
+- **Model type:** ONNX
+- **License:** MIT
+- **Model Description:** This is an optimized version of the nemotron-speech-streaming-en-0.6b model for local inference on GPUs.
+- **Disclaimer:** Model is only an optimization of the base model, any risk associated with the model is the responsibility of the user of the model. Please verify and test for your scenarios. There may be a slight difference in output from the base model with the optimizations applied. Note that optimizations applied are distinct from fine tuning and thus do not alter the intended uses or capabilities of the model.
+
+# Base Model Information
+
+See Hugging Face model [nemotron-speech-streaming-en-0.6b](https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b) for details.
@@ -0,0 +1,8 @@
+path:
+  container_name: models
+  container_path: foundrylocal/models/nemotron-speech-streaming-en-0.6b/onnx/cuda/v1
+  storage_name: foundrylocalassetdata
+  type: azureblob
+publish:
+  description: description.md
+  type: custom_model
@@ -0,0 +1,35 @@
+$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
+name: nemotron-speech-streaming-en-0.6b-cuda-gpu
+version: 1
+isArchived: true
+path: ./
+tags:
+  foundryLocal: ""
+  license: "MIT"
+  licenseDescription: "This model is provided under the License Terms available at <https://github.com/microsoft/Foundry-Local/blob/main/licenses/nemotron-speech-streaming.md>."
+  author: Microsoft
+  inputModalities: "audio"
+  outputModalities: "text"
+  task: automatic-speech-recognition
+  maxOutputTokens: 2048
+  alias: nemotron-speech-streaming-en-0.6b
+  directoryPath: v1
+  promptTemplate: ""
+  capabilities: ""
+  supportsReasoning: ""
+  reasoningStart: ""
+  reasoningEnd: ""
+  contextLength: 0
+  minFLVersion: "1.1.0"
+  disable-maap: "true"
+type: custom_model
+variantInfo:
+  parents:
+  - assetId: azureml://registries/azureml/models/nemotron-speech-streaming-en-0.6b/versions/1
+  variantMetadata:
+    modelType: 'ONNX'
+    quantization: ['RTN']
+    device: 'gpu'
+    executionProvider: 'CUDAExecutionProvider'
+    fileSizeBytes: 730746955
+    vRamFootprintBytes: 730746955
@@ -0,0 +1,4 @@
+extra_config: model.yaml
+spec: spec.yaml
+type: model
+categories: ["Local"]
@@ -0,0 +1,13 @@
+This model is a fine-tuned and optimized derivative of nemotron-speech-streaming-en-0.6b, adapted for Spanish speech recognition. The model is optimized for local inference on GPUs.
+
+# Model Description
+
+- **Developed by:** Microsoft 
+- **Model type:** ONNX 
+- **License:** MIT 
+- **Model Description:** This model is derived from nemotron-speech-streaming-en-0.6b and has been fine-tuned for Spanish speech recognition. It has been converted and optimized for efficient local inference on GPUs. 
+- **Disclaimer:** This model is a fine-tuned and optimized derivative of the base model, adapted for Spanish speech recognition. Due to this adaptation, its capabilities differ substantially from the original English model and are specialized for Spanish-language use cases. The model may not perform as expected on other languages or tasks. Users are responsible for evaluating the model in their specific application context.
+
+# Base Model Information
+
+See Hugging Face model https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b for details.
@@ -0,0 +1,8 @@
+path:
+  container_name: models
+  container_path: foundrylocal/models/nemotron-speech-streaming-es-0.6b-ft/onnx/cuda/v1
+  storage_name: foundrylocalassetdata
+  type: azureblob
+publish:
+  description: description.md
+  type: custom_model
@@ -0,0 +1,35 @@
+$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
+name: nemotron-speech-streaming-es-0.6b-ft-cuda-gpu
+version: 1
+isArchived: true
+path: ./
+tags:
+  foundryLocal: ""
+  license: "MIT"
+  licenseDescription: "This model is provided under the License Terms available at <https://github.com/microsoft/Foundry-Local/blob/main/licenses/nemotron-speech-streaming.md>."
+  author: Microsoft
+  inputModalities: "audio"
+  outputModalities: "text"
+  task: automatic-speech-recognition
+  maxOutputTokens: 2048
+  alias: nemotron-speech-streaming-es-0.6b
+  directoryPath: v1
+  promptTemplate: ""
+  capabilities: ""
+  supportsReasoning: ""
+  reasoningStart: ""
+  reasoningEnd: ""
+  contextLength: 0
+  minFLVersion: "1.1.0"
+  disable-maap: "true"
+type: custom_model
+variantInfo:
+  parents:
+  - assetId: azureml://registries/azureml/models/nemotron-speech-streaming-en-0.6b/versions/1
+  variantMetadata:
+    modelType: 'ONNX'
+    quantization: ['RTN']
+    device: 'gpu'
+    executionProvider: 'CUDAExecutionProvider'
+    fileSizeBytes: 764952910
+    vRamFootprintBytes: 764952910