diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml
index 9069228efd..105285a7a8 100644
--- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml
+++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/model.yaml
@@ -1,6 +1,6 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cuda/v1
+  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cuda/v2
   storage_name: foundrylocalassetdata
   type: azureblob
 publish:
diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml
index b1e969e83e..1943c9dd7e 100644
--- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml
+++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-cuda-gpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: nemotron-3.5-asr-streaming-0.6b-cuda-gpu
-version: 1
+version: 2
 isArchived: true
 path: ./
 tags:
@@ -13,7 +13,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: nemotron-3.5-asr-streaming-0.6b
-  directoryPath: v1
+  directoryPath: v2
   promptTemplate: ""
   capabilities: ""
   supportsReasoning: ""
@@ -31,5 +31,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'gpu'
     executionProvider: 'CUDAExecutionProvider'
-    fileSizeBytes: 793344637
-    vRamFootprintBytes: 793344637
+    fileSizeBytes: 793344782
+    vRamFootprintBytes: 793344782
diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml
index c165a362cb..88704bb607 100644
--- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml
+++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/model.yaml
@@ -1,6 +1,6 @@
 path:
   container_name: models
-  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cpu_and_mobile/v2
+  container_path: foundrylocal/models/nemotron-3.5-asr-streaming-0.6b/onnx/cpu_and_mobile/v3
   storage_name: foundrylocalassetdata
   type: azureblob
 publish:
diff --git a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml
index 9bd53d0356..0b3fa85b31 100644
--- a/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml
+++ b/assets/models/foundrylocal/nemotron-3.5-asr-streaming-0.6b-generic-cpu/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
 name: nemotron-3.5-asr-streaming-0.6b-generic-cpu
-version: 2
+version: 3
 isArchived: true
 path: ./
 tags:
@@ -13,7 +13,7 @@ tags:
   task: automatic-speech-recognition
   maxOutputTokens: 2048
   alias: nemotron-3.5-asr-streaming-0.6b
-  directoryPath: v2
+  directoryPath: v3
   promptTemplate: ""
   capabilities: ""
   supportsReasoning: ""
@@ -31,5 +31,5 @@ variantInfo:
     quantization: ['RTN']
     device: 'cpu'
     executionProvider: 'CPUExecutionProvider'
-    fileSizeBytes: 793344112
-    vRamFootprintBytes: 793344112
+    fileSizeBytes: 793344362
+    vRamFootprintBytes: 793344362
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/asset.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/asset.yaml
new file mode 100644
index 0000000000..eb27bbc7f8
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/asset.yaml
@@ -0,0 +1,4 @@
+extra_config: model.yaml
+spec: spec.yaml
+type: model
+categories: ["Local"]
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/description.md b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/description.md
new file mode 100644
index 0000000000..d2cdbec812
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/description.md
@@ -0,0 +1,13 @@
+This model is an optimized version of nemotron-speech-streaming-en-0.6b to enable local inference on GPUs. This model uses RTN quantization.
+
+# Model Description
+
+- **Developed by:** Microsoft
+- **Model type:** ONNX
+- **License:** MIT
+- **Model Description:** This is an optimized version of the nemotron-speech-streaming-en-0.6b model for local inference on GPUs.
+- **Disclaimer:** Model is only an optimization of the base model, any risk associated with the model is the responsibility of the user of the model. Please verify and test for your scenarios. There may be a slight difference in output from the base model with the optimizations applied. Note that optimizations applied are distinct from fine tuning and thus do not alter the intended uses or capabilities of the model.
+
+# Base Model Information
+
+See Hugging Face model [nemotron-speech-streaming-en-0.6b](https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b) for details.
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/model.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/model.yaml
new file mode 100644
index 0000000000..418d395089
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/model.yaml
@@ -0,0 +1,8 @@
+path:
+  container_name: models
+  container_path: foundrylocal/models/nemotron-speech-streaming-en-0.6b/onnx/cuda/v1
+  storage_name: foundrylocalassetdata
+  type: azureblob
+publish:
+  description: description.md
+  type: custom_model
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/spec.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/spec.yaml
new file mode 100644
index 0000000000..69ce0bb19f
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-en-0.6b-cuda-gpu/spec.yaml
@@ -0,0 +1,35 @@
+$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
+name: nemotron-speech-streaming-en-0.6b-cuda-gpu
+version: 1
+isArchived: true
+path: ./
+tags:
+  foundryLocal: ""
+  license: "MIT"
+  licenseDescription: "This model is provided under the License Terms available at <https://github.com/microsoft/Foundry-Local/blob/main/licenses/nemotron-speech-streaming.md>."
+  author: Microsoft
+  inputModalities: "audio"
+  outputModalities: "text"
+  task: automatic-speech-recognition
+  maxOutputTokens: 2048
+  alias: nemotron-speech-streaming-en-0.6b
+  directoryPath: v1
+  promptTemplate: ""
+  capabilities: ""
+  supportsReasoning: ""
+  reasoningStart: ""
+  reasoningEnd: ""
+  contextLength: 0
+  minFLVersion: "1.1.0"
+  disable-maap: "true"
+type: custom_model
+variantInfo:
+  parents:
+  - assetId: azureml://registries/azureml/models/nemotron-speech-streaming-en-0.6b/versions/1
+  variantMetadata:
+    modelType: 'ONNX'
+    quantization: ['RTN']
+    device: 'gpu'
+    executionProvider: 'CUDAExecutionProvider'
+    fileSizeBytes: 730746955
+    vRamFootprintBytes: 730746955
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/asset.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/asset.yaml
new file mode 100644
index 0000000000..eb27bbc7f8
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/asset.yaml
@@ -0,0 +1,4 @@
+extra_config: model.yaml
+spec: spec.yaml
+type: model
+categories: ["Local"]
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/description.md b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/description.md
new file mode 100644
index 0000000000..c11e8c4e25
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/description.md
@@ -0,0 +1,13 @@
+This model is a fine-tuned and optimized derivative of nemotron-speech-streaming-en-0.6b, adapted for Spanish speech recognition. The model is optimized for local inference on GPUs.
+ 
+# Model Description
+ 
+- **Developed by:** Microsoft 
+- **Model type:** ONNX 
+- **License:** MIT 
+- **Model Description:** This model is derived from nemotron-speech-streaming-en-0.6b and has been fine-tuned for Spanish speech recognition. It has been converted and optimized for efficient local inference on GPUs. 
+- **Disclaimer:** This model is a fine-tuned and optimized derivative of the base model, adapted for Spanish speech recognition. Due to this adaptation, its capabilities differ substantially from the original English model and are specialized for Spanish-language use cases. The model may not perform as expected on other languages or tasks. Users are responsible for evaluating the model in their specific application context.
+ 
+# Base Model Information
+ 
+See Hugging Face model https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b for details.
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/model.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/model.yaml
new file mode 100644
index 0000000000..ba0e1a769d
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/model.yaml
@@ -0,0 +1,8 @@
+path:
+  container_name: models
+  container_path: foundrylocal/models/nemotron-speech-streaming-es-0.6b-ft/onnx/cuda/v1
+  storage_name: foundrylocalassetdata
+  type: azureblob
+publish:
+  description: description.md
+  type: custom_model
diff --git a/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/spec.yaml b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/spec.yaml
new file mode 100644
index 0000000000..7e5fd3e9de
--- /dev/null
+++ b/assets/models/foundrylocal/nemotron-speech-streaming-es-0.6b-ft-cuda-gpu/spec.yaml
@@ -0,0 +1,35 @@
+$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json
+name: nemotron-speech-streaming-es-0.6b-ft-cuda-gpu
+version: 1
+isArchived: true
+path: ./
+tags:
+  foundryLocal: ""
+  license: "MIT"
+  licenseDescription: "This model is provided under the License Terms available at <https://github.com/microsoft/Foundry-Local/blob/main/licenses/nemotron-speech-streaming.md>."
+  author: Microsoft
+  inputModalities: "audio"
+  outputModalities: "text"
+  task: automatic-speech-recognition
+  maxOutputTokens: 2048
+  alias: nemotron-speech-streaming-es-0.6b
+  directoryPath: v1
+  promptTemplate: ""
+  capabilities: ""
+  supportsReasoning: ""
+  reasoningStart: ""
+  reasoningEnd: ""
+  contextLength: 0
+  minFLVersion: "1.1.0"
+  disable-maap: "true"
+type: custom_model
+variantInfo:
+  parents:
+  - assetId: azureml://registries/azureml/models/nemotron-speech-streaming-en-0.6b/versions/1
+  variantMetadata:
+    modelType: 'ONNX'
+    quantization: ['RTN']
+    device: 'gpu'
+    executionProvider: 'CUDAExecutionProvider'
+    fileSizeBytes: 764952910
+    vRamFootprintBytes: 764952910