From 21f4d2bbe9eab11cb1cbb09de34a1a71ab48814c Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Tue, 25 Mar 2025 11:00:38 +0530
Subject: [PATCH 01/29] AML pipeline component for MaaP self serve validation

---
 .../publish_result_selfserve/asset.yaml       |  11 +
 .../publish_result_selfserve/spec.yaml        |  51 ++++
 .../run_inference_validation/asset.yaml       |   3 +
 .../run_inference_validation/spec.yaml        |  45 ++++
 .../validate_model_inference/asset.yaml       |   3 +
 .../validate_model_inference/spec.yaml        | 180 ++++++++++++++
 .../src/azureml/model/mgmt/config.py          |   1 +
 .../publish_validation_results_selfserve.py   | 175 +++++++++++++
 .../src/run_inference_validation.py           | 232 ++++++++++++++++++
 9 files changed, 701 insertions(+)
 create mode 100644 assets/training/model_management/components/publish_result_selfserve/asset.yaml
 create mode 100644 assets/training/model_management/components/publish_result_selfserve/spec.yaml
 create mode 100644 assets/training/model_management/components/run_inference_validation/asset.yaml
 create mode 100644 assets/training/model_management/components/run_inference_validation/spec.yaml
 create mode 100644 assets/training/model_management/components/validate_model_inference/asset.yaml
 create mode 100644 assets/training/model_management/components/validate_model_inference/spec.yaml
 create mode 100644 assets/training/model_management/src/publish_validation_results_selfserve.py
 create mode 100644 assets/training/model_management/src/run_inference_validation.py

diff --git a/assets/training/model_management/components/publish_result_selfserve/asset.yaml b/assets/training/model_management/components/publish_result_selfserve/asset.yaml
new file mode 100644
index 0000000000..9d4136ecd3
--- /dev/null
+++ b/assets/training/model_management/components/publish_result_selfserve/asset.yaml
@@ -0,0 +1,11 @@
+type: component
+spec: spec.yaml
+categories:
+  [
+    "CommonBench Baselining",
+    "Benchmarking",
+    "Run Benchmark",
+    "Publish Results",
+    "Self-Serve API",
+    "API Inferencing"
+  ]
diff --git a/assets/training/model_management/components/publish_result_selfserve/spec.yaml b/assets/training/model_management/components/publish_result_selfserve/spec.yaml
new file mode 100644
index 0000000000..23ad9f9d40
--- /dev/null
+++ b/assets/training/model_management/components/publish_result_selfserve/spec.yaml
@@ -0,0 +1,51 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+type: command
+is_deterministic: true
+
+name: publish_validation_results_selfserve
+version: 0.0.1
+display_name: Publish model validation results to Self-Serve
+description: |
+  This component publishes model validation results to the Self-Serve database.
+
+environment: azureml://registries/azureml/environments/model-management/versions/41
+
+inputs:
+  selfserve_base_url:
+    type: string
+    optional: false
+    default: "https://int.api.azureml-test.ms"
+    description: Base URL of the model publisher self-serve API
+  model_name:
+    type: string
+    optional: false
+    description: Name of the model (e.g., VerboGenie)
+  model_version:
+    type: integer
+    optional: false
+    description: Model onboarding version (e.g., 5)
+  publisher_name:
+    type: string
+    optional: false
+    description: Name of the model publisher (e.g., ContosoAI)
+  sku:
+    type: string
+    optional: false
+    default: "Standard_NC24ads_A100_v4"
+    description: Suggested SKU based on benchmark results
+  metrics_storage_uri:
+    type: uri_file
+    optional: false
+    mode: ro_mount
+    description: Path to the file containing the validation metrics csv storage path
+
+code: ../../src
+
+command: >-
+  python publish_validation_results_selfserve.py 
+  --selfserve-base-url ${{inputs.selfserve_base_url}} 
+  --model-name ${{inputs.model_name}} 
+  --model-version ${{inputs.model_version}} 
+  --publisher-name ${{inputs.publisher_name}} 
+  --sku ${{inputs.sku}} 
+  --metrics-storage-uri ${{inputs.metrics_storage_uri}}
\ No newline at end of file
diff --git a/assets/training/model_management/components/run_inference_validation/asset.yaml b/assets/training/model_management/components/run_inference_validation/asset.yaml
new file mode 100644
index 0000000000..c01772d398
--- /dev/null
+++ b/assets/training/model_management/components/run_inference_validation/asset.yaml
@@ -0,0 +1,3 @@
+type: component
+spec: spec.yaml
+categories: ["Model"]
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
new file mode 100644
index 0000000000..91c38775a6
--- /dev/null
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -0,0 +1,45 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+name: run_inference_validation
+version: 0.0.1
+type: command
+
+is_deterministic: True
+
+display_name: Run Inference Validation
+description: Compares the expected inference response with the actual response from model deployment.
+
+environment: azureml://registries/azureml/environments/model-management/versions/41
+
+code: ../../src
+command: >-
+  python run_inference_validation.py
+  --inference_payload ${{inputs.inference_payload}}
+  --expected_response ${{inputs.expected_response}}
+  --inference_response ${{inputs.inference_response}}
+  --validation_result ${{outputs.validation_result}}
+  --metrics_storage_uri ${{outputs.metrics_storage_uri}}
+
+inputs:
+  inference_payload:
+    type: uri_file
+    description: JSON input payload used for inference.
+
+  expected_response:
+    type: uri_file
+    description: JSON file containing the expected inference response.
+
+  inference_response:
+    type: uri_file
+    description: JSON file containing the actual inference response from the deployed model.
+
+outputs:
+  validation_result:
+    type: uri_file
+    description: JSON file containing the validation results.
+  metrics_storage_uri:
+    type: uri_file
+    description: JSON file containing the validation metrics csv storage path
+
+tags:
+    Preview: ""
+    Internal: ""
diff --git a/assets/training/model_management/components/validate_model_inference/asset.yaml b/assets/training/model_management/components/validate_model_inference/asset.yaml
new file mode 100644
index 0000000000..c01772d398
--- /dev/null
+++ b/assets/training/model_management/components/validate_model_inference/asset.yaml
@@ -0,0 +1,3 @@
+type: component
+spec: spec.yaml
+categories: ["Model"]
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
new file mode 100644
index 0000000000..e82eab4ab2
--- /dev/null
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -0,0 +1,180 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
+type: pipeline
+
+name: validate_model_inference
+display_name: Validate Model Inference
+description: deploy a model and validate it using a sample payload
+version: 0.0.1
+
+experiment_name: validate_model
+
+inputs:
+  compute:
+    type: string
+    optional: true
+    default: serverless
+    description: Compute for model deployment and inferencing
+  
+  instance_type:
+    type: string
+    optional: true
+    enum:
+      - Standard_DS1_v2
+      - Standard_DS2_v2
+      - Standard_DS3_v2
+      - Standard_DS4_v2
+      - Standard_DS5_v2
+      - Standard_F2s_v2
+      - Standard_F4s_v2
+      - Standard_F8s_v2
+      - Standard_F16s_v2
+      - Standard_F32s_v2
+      - Standard_F48s_v2
+      - Standard_F64s_v2
+      - Standard_F72s_v2
+      - Standard_FX24mds
+      - Standard_FX36mds
+      - Standard_FX48mds
+      - Standard_E2s_v3
+      - Standard_E4s_v3
+      - Standard_E8s_v3
+      - Standard_E16s_v3
+      - Standard_E32s_v3
+      - Standard_E48s_v3
+      - Standard_E64s_v3
+      - Standard_NC4as_T4_v3
+      - Standard_NC6s_v2
+      - Standard_NC6s_v3
+      - Standard_NC8as_T4_v3
+      - Standard_NC12s_v2
+      - Standard_NC12s_v3
+      - Standard_NC16as_T4_v3
+      - Standard_NC24s_v2
+      - Standard_NC24s_v3
+      - Standard_NC24rs_v3
+      - Standard_NC64as_T4_v3
+      - Standard_ND40rs_v2
+      - Standard_ND96asr_v4
+      - Standard_ND96amsr_A100_v4
+    default: Standard_NC6s_v3
+    description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available.
+
+  instance_count:
+    type: integer
+    optional: true
+    default: 1
+    description: Number of instances you want to use for deployment. Make sure instance type have enough quota available.
+
+  model_id:
+    type: string
+    optional: true 
+    description: |
+      Asset ID of the model registered in workspace/registry.
+      Registry - azureml://registries/<registry-name>/models/<model-name>/versions/<version>
+      Workspace - azureml:<model-name>:<version>
+
+  model_name:
+    type: string
+    optional: false
+    description: Name of the model to validate.
+
+  model_version:
+    type: integer
+    optional: false
+    description: Model onboarding version (e.g., 5)
+
+  publisher_name:
+    type: string
+    optional: false
+    description: Name of the model publisher (e.g., ContosoAI)
+
+  selfserve_base_url:
+    type: string
+    optional: true
+    default: "https://int.api.azureml-test.ms"
+    description: Base URL of the model publisher self-serve API
+
+  sku:
+    type: string
+    optional: true
+    default: "Standard_NC24ads_A100_v4"
+    description: SKU of the deployed model endpoint.
+
+  inference_payload:
+    type: uri_file
+    optional: true
+    description: JSON payload which would be used to validate deployment
+
+  endpoint_name:
+    type: string
+    optional: true
+    description: Name of the endpoint
+
+  deployment_name:
+    type: string
+    optional: true
+    default: default
+    description: Name of the deployment
+
+  expected_inference_response:
+    type: uri_file
+    description: JSON file containing the expected inference response.
+
+# Pipeline outputs
+outputs:
+  validation_result:
+    description: Output file containing the validation results.
+    type: uri_file
+
+jobs:
+  online_deployment_model:
+    type: command
+    component: azureml:/deploy_model:0.0.12
+    compute: ${{parent.inputs.compute}}
+    inputs:
+      model_id: ${{parent.inputs.model_id}}
+      inference_payload: ${{parent.inputs.inference_payload}}
+      endpoint_name: ${{parent.inputs.endpoint_name}}
+      deployment_name: ${{parent.inputs.deployment_name}}
+      instance_type: ${{parent.inputs.instance_type}}
+      instance_count: ${{parent.inputs.instance_count}}
+    identity:
+      type: user_identity
+    outputs:
+      model_deployment_details:
+        type: uri_file
+      inference_response:
+        type: uri_file
+
+  run_inference_validation:
+    type: command
+    component: azureml:run_inference_validation:0.0.1
+    identity:
+      type: user_identity
+    inputs:
+      inference_payload: ${{parent.inputs.inference_payload}}
+      expected_response: ${{parent.inputs.expected_inference_response}}
+      inference_response: ${{parent.jobs.online_deployment_model.outputs.inference_response}}
+    outputs:
+      validation_result: ${{parent.outputs.validation_result}}
+
+  delete_endpoints:
+    type: command
+    component: azureml:delete_endpoint:0.0.7
+    inputs:
+      model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
+      endpoint_name: ${{parent.inputs.endpoint_name}}
+    identity:
+      type: user_identity
+    compute: ${{parent.inputs.compute}}
+
+  publish_results:
+    type: command
+    component: azureml:publish_validation_results_selfserve:0.0.1
+    inputs:
+      publisher_name: ${{parent.inputs.publisher_name}}
+      model_name: ${{parent.inputs.model_name}}
+      model_version: ${{parent.inputs.model_version}}
+      sku: ${{parent.inputs.instance_type}}
+      selfserve_base_url: ${{parent.inputs.selfserve_base_url}}
+      metrics_storage_uri: ${{parent.jobs.run_inference_validation.outputs.metrics_storage_uri}}
diff --git a/assets/training/model_management/src/azureml/model/mgmt/config.py b/assets/training/model_management/src/azureml/model/mgmt/config.py
index 2cd6b5f5d4..7de73aa0fe 100644
--- a/assets/training/model_management/src/azureml/model/mgmt/config.py
+++ b/assets/training/model_management/src/azureml/model/mgmt/config.py
@@ -47,6 +47,7 @@ class AppName:
     DOWNLOAD_MODEL = "download_model"
     CONVERT_MODEL_TO_MLFLOW = "convert_model_to_mlflow"
     VALIDATION_TRIGGER_IMPORT = "validation_trigger_import"
+    RUN_INFERENCE_VALIDATION = "run_inference_validation"
 
 
 class LoggerConfig:
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
new file mode 100644
index 0000000000..5b92e3bdff
--- /dev/null
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -0,0 +1,175 @@
+"""Update model onboarding version with CommonBench results."""
+
+import sys
+import os
+import uuid
+import json
+import logging
+import requests
+import argparse
+from datetime import datetime, timezone
+from azure.identity import ManagedIdentityCredential
+from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
+from datetime import datetime
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def read_results_from_file(file_path):
+    """Read the metrics results from the given file path."""
+    try:
+        with open(file_path, 'r') as f:
+            results_dict = json.load(f)
+        print(f"Results loaded from {file_path}")
+        return results_dict
+    except Exception as e:
+        print(f"Error reading from file: {e}")
+        return None
+
+
+def update_model_onboarding_version(
+    publisher_name,
+    model_name,
+    model_version,
+    selfserve_base_url,
+    sku,
+    metrics_storage_uri
+):
+    """Update model onboarding version with benchmark results."""
+    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    is_obo = False
+    try:
+        credential = AzureMLOnBehalfOfCredential()
+        token = credential.get_token(
+            "https://management.azure.com/.default").token
+        is_obo = True
+    except Exception as ex:
+        logger.warning(f"Failed to get OBO credentials - {ex}")
+
+    if not is_obo:
+        try:
+            logger.info("Fetching MSI credential")
+            msi_client_id = os.environ.get("DEFAULT_IDENTITY_CLIENT_ID")
+            credential = ManagedIdentityCredential(client_id=msi_client_id)
+            token = credential.get_token(
+                "https://management.azure.com/.default").token
+        except Exception as ex:
+            raise (f"Failed to get MSI credentials : {ex}")
+
+    metrics_path_dict = read_results_from_file(metrics_storage_uri)
+
+    run_id = str(uuid.uuid4())
+
+    validation_result = []
+
+    if metrics_path_dict.get("perf_bench_path") is not None:
+        validation_result.append({
+            "runId": run_id,
+            "type": "PERF_BENCHMARK",
+            "passed": True,
+            "message": "Baseline data is captured successfully",
+            "validationResultUrl": metrics_path_dict.get("perf_bench_path"),
+            "createdTime": current_time,
+            "status": "success",
+            "sku": sku
+        })
+
+    if metrics_path_dict.get("api_validation_path") is not None:
+        validation_result.append({
+            "runId": run_id,
+            "type": "API_VALIDATION",
+            "passed": True,
+            "message": "API validation passed successfully",
+            "validationResultUrl": metrics_path_dict.get("api_validation_path"),
+            "status": "success",
+            "createdTime": current_time,
+            "sku": sku
+        })
+
+    if metrics_path_dict.get("api_inference_path") is not None:
+        validation_result.append({
+            "runId": run_id,
+            "type": "API_VALIDATION",
+            "passed": True,
+            "message": "API inference passed successfully",
+            "validationResultUrl": metrics_path_dict.get("api_inference_path"),
+            "status": "success",
+            "createdTime": current_time,
+            "sku": sku
+        })
+
+    payload = {
+        "suggestedSKU": sku,
+        "status": "Validation",
+        "subStatus": "Validation_Successful",
+        "validationResult": validation_result
+    }
+
+    api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31"
+
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+        "User-Agent": "AzureML-ModelPublishing/1.0"
+    }
+
+    try:
+        logger.info(f"Sending request to {api_url} \n, headers: {headers} \n, payload: {payload}")
+
+        response = requests.put(api_url, headers=headers, json=payload)
+
+        logger.info(f"Response: {response.json()}")
+
+        if response.ok:
+            logger.info(
+                f"Successfully updated model onboarding version. Response: {response.status_code}")
+            return {"status_code": response.status_code}
+        else:
+            logger.error(
+                f"Failed to update model onboarding version. Status code: {response.status_code}")
+            logger.error(f"Response content: {response.json()}")
+            raise Exception(
+                f"Request failed with status code {response.status_code}: {response.json()}")
+    except requests.RequestException as e:
+        logging.error(f"Request failed: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Update model onboarding version with CommonBench validation results")
+
+    parser.add_argument("--publisher-name", required=True,
+                        help="Name of the model publisher (e.g., ContosoAI)")
+    parser.add_argument("--model-name", required=True,
+                        help="Name of the model (e.g., VerboGenie)")
+    parser.add_argument("--model-version", required=True,
+                        help="Model onboarding version (e.g., 5)")
+    parser.add_argument("--selfserve-base-url", required=True,
+                        default="https://int.api.azureml-test.ms",
+                        help="Base URL of the model publisher self-serve API")
+    parser.add_argument("--metrics-storage-uri", required=True,
+                        help="URI to the storage where validation metrics are stored")
+    parser.add_argument("--sku", required=False,
+                        default="Standard_NC24ads_A100_v4",
+                        help="Suggested SKU based on benchmark results")
+
+    args = parser.parse_args()
+    logger.info(f"Arguments: {args}")
+
+    try:
+        result = update_model_onboarding_version(
+            args.publisher_name,
+            args.model_name,
+            args.model_version,
+            args.selfserve_base_url,
+            args.sku,
+            args.metrics_storage_uri
+        )
+        logger.info("Model onboarding version update completed successfully")
+    except Exception as e:
+        logger.error(f"Failed to update model onboarding version: {e}")
+        sys.exit(1)
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
new file mode 100644
index 0000000000..9129a19ade
--- /dev/null
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -0,0 +1,232 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Validate the structure of expected and actual inference response JSON files."""
+
+import json
+import argparse
+import os
+from azureml.core import Run
+from azureml.model.mgmt.utils.common_utils import get_mlclient
+from azureml.model.mgmt.config import AppName
+from azureml.model.mgmt.utils.logging_utils import custom_dimensions, get_logger
+
+
+logger = get_logger(__name__)
+custom_dimensions.app_name = AppName.RUN_INFERENCE_VALIDATION
+
+
+def load_json(file_path):
+    """Load JSON data from a file. If the loaded data is a string, try to parse it as JSON."""
+    try:
+        with open(file_path, "r") as f:
+            data = json.load(f)
+        # If data is a string, parse it as JSON.
+        if isinstance(data, str):
+            try:
+                data = json.loads(data)
+            except Exception as e:
+                logger.warning(f"Error parsing JSON from string in {file_path}: {e}")
+        return data
+    except Exception as e:
+        logger.warning(f"Error loading JSON file {file_path}: {e}")
+        return None
+
+
+def get_json_structure(data):
+    """
+    Recursively extract the structure of JSON (keys only).
+
+    For dictionaries, returns a dict of keys mapped to their structure.
+    For lists, returns a list with the structure of the first element.
+    For other types, returns None.
+    """
+    if isinstance(data, dict):
+        return {key: get_json_structure(value) for key, value in data.items()}
+    elif isinstance(data, list) and len(data) > 0:
+        # Assume all elements share the same structure and return the structure of the first element.
+        return [get_json_structure(data[0])]
+    else:
+        return None
+
+
+def compare_structures(inference_payload, expected_response, inference_response):
+    """
+    Compare JSON structures (keys only) of expected and actual.
+
+    Returns a dictionary with validation results.
+    """
+    expected_structure = get_json_structure(expected_response)
+    actual_structure = get_json_structure(inference_response)
+    logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}")
+
+    result = {
+        "inference_payload": inference_payload,
+        "inference_output": inference_response, 
+        "structure_match": expected_structure == actual_structure,
+        "expected_structure": expected_structure,
+        "actual_structure": actual_structure,
+        "differences": []
+    }
+
+    if not result["structure_match"]:
+        result["differences"] = [
+            {"expected": expected_structure, "actual": actual_structure}
+        ]
+    logger.info(f"result: {result}")
+    return result
+
+
+def save_validation_result(result, output_path):
+    """Save validation results to a JSON file."""
+    try:
+        with open(output_path, "w") as f:
+            json.dump(result, f, indent=4)
+        logger.info(f"Validation result saved to {output_path}")
+    except Exception as e:
+        logger.error(f"Error saving validation result: {e}")
+
+def replace_name_in_path(path_template, name_value):
+    """Replace the placeholder in the output path with the actual job name."""
+    return path_template.replace('${{name}}', name_value)
+
+def fetch_storage_uri():
+    """Return the storage URI of the output file from the AzureML pipeline run."""
+    try:
+        run = Run.get_context()
+        run_details = run.get_details()
+        output_data_path = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']['path']
+        
+        output_data_uri = replace_name_in_path(output_data_path, run.id)
+
+        logger.info(f"Output data URI: {output_data_uri}, output_data_path: {output_data_path}")
+
+        # Extract datastore name and path from the AzureML URI
+        datastore_name, path = extract_datastore_info(output_data_uri)
+        
+        # Construct the storage URI
+        storage_uri = get_storage_url(datastore_name)
+        full_storage_uri = f"{storage_uri}/{path}"
+        logger.info(f"Full storage URI: {full_storage_uri}")
+        
+        return full_storage_uri
+    except Exception as e:
+        logger.error(f"Error fetching storage URI: {e}")
+        return None
+
+
+def store_metrics_paths(metrics_file_path):
+    """Store the paths of the metrics CSV files in a JSON file."""
+    base_path = fetch_storage_uri()
+
+    logger.info(f"validation_result_path: {base_path}")
+    result_dict = {}
+    result_dict['api_inference_path'] = base_path
+    if result_dict:
+        write_results_to_file(result_dict, metrics_file_path)
+
+
+def fetch_path(output_dir):
+    """Return the relative path of the data from the output directory."""
+    try:
+        # Calculate relative path from the job folder
+        rel_path = os.path.relpath(output_dir, os.getcwd())
+        logger.info(f"rel_path: {rel_path}")
+        result_dict = {
+            'api_inference_path': rel_path
+        }
+        return result_dict
+    except Exception as e:
+        logger.error(f"Error calculating relative path: {e}")
+        return {}
+
+
+def write_results_to_file(results_dict, file_path):
+    """Write the results dictionary to a JSON file."""
+    try:
+        with open(file_path, 'w') as f:
+            json.dump(results_dict, f, indent=4)
+        logger.info(f"Results written to {file_path} in JSON format")
+        return True
+    except Exception as e:
+        logger.error(f"Error writing to file: {e}")
+        return False
+
+
+def get_storage_url(datastore_name):
+    """Retrieve the storage URL for the specified datastore."""
+    # Get MLClient instance
+    ml_client = get_mlclient()
+    logger.info(f"ml_client: {ml_client}")
+    datastore = ml_client.datastores.get(datastore_name)
+    storage_account_name = datastore.account_name
+    container_name = datastore.container_name
+    endpoint = datastore.endpoint
+
+    storage_uri = f"https://{storage_account_name}.blob.{endpoint}/{container_name}"
+    logger.info(f"storage_uri: {storage_uri}")
+
+    return storage_uri
+
+
+def extract_datastore_info(datastore_uri_path):
+    """Extract both datastore name and path from an Azure ML datastore URI path."""
+    # Check if it's a valid datastore URI
+    if not datastore_uri_path.startswith('azureml://datastores/'):
+        return None, None
+
+    parts = datastore_uri_path.split('/')
+
+    # The datastore name should be the part after 'datastores/'
+    if len(parts) >= 5 and parts[0] == 'azureml:' and parts[1] == '' and parts[2] == 'datastores' and 'paths' in parts:
+        datastore_name = parts[3]
+
+        # Find the index of 'paths' in the URI
+        paths_index = parts.index('paths')
+
+        # Join everything after 'paths/' to form the path
+        path = '/'.join(parts[(paths_index + 1):])
+
+        return datastore_name, path
+
+    return None, None
+
+
+def main():
+    """Compare expected and actual inference response structures."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--inference_payload", type=str, required=True,
+                        help="Path to the expected inference response JSON file.")
+    parser.add_argument("--expected_response", type=str, required=True,
+                        help="Path to the expected inference response JSON file.")
+    parser.add_argument("--inference_response", type=str, required=True,
+                        help="Path to the actual inference response JSON file.")
+    parser.add_argument("--validation_result", type=str, required=True,
+                        help="Path to save validation results.")
+    parser.add_argument("--metrics_storage_uri", type=str, required=True,
+                        help="Path to store the metrics.")
+
+    args = parser.parse_args()
+
+    # Load expected and actual responses.
+    inference_payload = load_json(args.inference_payload)
+    expected_response = load_json(args.expected_response)
+    inference_response = load_json(args.inference_response)
+    logger.info(f"expected response: {expected_response}, actual response: {inference_response}")
+
+    if expected_response is None or inference_response is None:
+        logger.warning("One or both JSON files could not be loaded.")
+        return
+
+    # Compare the JSON structures.
+    validation_result = compare_structures(inference_payload, expected_response, inference_response)
+
+    # Save the validation result.
+    save_validation_result(validation_result, args.validation_result)
+    logger.info(f"validation_result: {validation_result}, Validation result saved to {args.validation_result}")
+
+    store_metrics_paths(args.metrics_storage_uri)
+
+
+if __name__ == "__main__":
+    main()

From eecdceab45d9f37ddb98520ae7002beca7339590 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Fri, 28 Mar 2025 12:07:06 +0530
Subject: [PATCH 02/29] AML pipeline update to run the validation and publish
 result for maap self serve

---
 .../components/delete_endpoint/spec.yaml      |  2 +-
 .../common/components/deploy_model/spec.yaml  |  8 +++--
 assets/common/src/deploy.py                   | 16 +++++++++
 .../run_inference_validation/spec.yaml        |  3 +-
 .../validate_model_inference/spec.yaml        |  3 +-
 .../publish_validation_results_selfserve.py   | 15 ++++----
 .../src/run_inference_validation.py           | 34 +++++++++++--------
 7 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/assets/common/components/delete_endpoint/spec.yaml b/assets/common/components/delete_endpoint/spec.yaml
index 20d0f24c34..9e55975a1f 100644
--- a/assets/common/components/delete_endpoint/spec.yaml
+++ b/assets/common/components/delete_endpoint/spec.yaml
@@ -9,7 +9,7 @@ display_name: Delete Endpoint
 description:
   Deletes an endpoint resource.
 
-environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19
+environment: azureml://registries/azureml/environments/python-sdk-v2/versions/28
 
 code: ../../src
 command: >-
diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index db1670adeb..5add6652a6 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.11
+version: 0.0.12
 type: command
 
 is_deterministic: True
@@ -9,7 +9,7 @@ display_name: Deploy model
 description:
   Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached.
 
-environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19
+environment: azureml://registries/azureml/environments/python-sdk-v2/versions/28
 
 code: ../../src
 command: >-
@@ -35,6 +35,7 @@ command: >-
   $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]]
   $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]]
   --model_deployment_details ${{outputs.model_deployment_details}}
+  --inference_response ${{outputs.inference_response}}
 
 inputs:
   # Output of registering component
@@ -202,6 +203,9 @@ outputs:
   model_deployment_details:
     type: uri_file
     description: Json file to which deployment details will be written
+  inference_response:
+    type: uri_file
+    description: JSON file containing inference results
 
 tags:
     Preview: ""
diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index a4afa64493..8920ba1792 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -162,6 +162,11 @@ def parse_args():
         type=str,
         help="Json file to which deployment details will be written",
     )
+    parser.add_argument(
+        "--inference_response",
+        type=str,
+        help="Json file to save the inference response",
+    )
     # parse args
     args = parser.parse_args()
     logger.info(f"Args received {args}")
@@ -301,6 +306,7 @@ def main():
         args=args
     )
 
+    inference_result = None
     if args.inference_payload:
         print("Invoking inference with test payload ...")
         try:
@@ -311,6 +317,12 @@ def main():
             )
             print(f"Response:\n{response}")
             logger.info(f"Endpoint invoked successfully with response :{response}")
+            # Save inference response
+            inference_result = response
+            if args.inference_response:
+                with open(args.inference_response, "w") as f:
+                    json.dump(inference_result, f, indent=4)
+                logger.info("Saved inference response to output JSON file.")
         except Exception as e:
             raise AzureMLException._with_error(
                 AzureMLError.create(OnlineEndpointInvocationError, exception=e)
@@ -334,6 +346,10 @@ def main():
         outfile.write(json_object)
     logger.info("Saved deployment details in output json file.")
 
+    if response and args.inference_response:
+        with open(args.inference_response, "w") as outfile:
+            outfile.write(response)
+        logger.info("Saved inference response in output json file.")
 
 # run script
 if __name__ == "__main__":
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index 91c38775a6..4859e8d763 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -14,7 +14,7 @@ code: ../../src
 command: >-
   python run_inference_validation.py
   --inference_payload ${{inputs.inference_payload}}
-  --expected_response ${{inputs.expected_response}}
+  $[[--expected_response ${{inputs.expected_response}}]]
   --inference_response ${{inputs.inference_response}}
   --validation_result ${{outputs.validation_result}}
   --metrics_storage_uri ${{outputs.metrics_storage_uri}}
@@ -26,6 +26,7 @@ inputs:
 
   expected_response:
     type: uri_file
+    optional: true
     description: JSON file containing the expected inference response.
 
   inference_response:
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index e82eab4ab2..ebf3150e53 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -118,6 +118,7 @@ inputs:
 
   expected_inference_response:
     type: uri_file
+    optional: true
     description: JSON file containing the expected inference response.
 
 # Pipeline outputs
@@ -129,7 +130,7 @@ outputs:
 jobs:
   online_deployment_model:
     type: command
-    component: azureml:/deploy_model:0.0.12
+    component: azureml:deploy_model:0.0.12
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index 5b92e3bdff..cbdfe14621 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -4,17 +4,18 @@
 import os
 import uuid
 import json
-import logging
 import requests
 import argparse
 from datetime import datetime, timezone
 from azure.identity import ManagedIdentityCredential
 from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
 from datetime import datetime
+from azureml.model.mgmt.config import AppName
+from azureml.model.mgmt.utils.logging_utils import custom_dimensions, get_logger
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
+custom_dimensions.app_name = AppName.PUBHLISH_VALIDATION_RESULTS_SELF_SERVE
 
 
 def read_results_from_file(file_path):
@@ -121,7 +122,7 @@ def update_model_onboarding_version(
 
         response = requests.put(api_url, headers=headers, json=payload)
 
-        logger.info(f"Response: {response.json()}")
+        logger.info(f"Response: {response.text}")
 
         if response.ok:
             logger.info(
@@ -130,11 +131,11 @@ def update_model_onboarding_version(
         else:
             logger.error(
                 f"Failed to update model onboarding version. Status code: {response.status_code}")
-            logger.error(f"Response content: {response.json()}")
+            logger.error(f"Response content: {response.text}")
             raise Exception(
-                f"Request failed with status code {response.status_code}: {response.json()}")
+                f"Request failed with status code {response.status_code}: {response.text}")
     except requests.RequestException as e:
-        logging.error(f"Request failed: {e}")
+        logger.error(f"Request failed: {e}")
         raise
 
 
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 9129a19ade..0edbb65841 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -73,7 +73,7 @@ def compare_structures(inference_payload, expected_response, inference_response)
         result["differences"] = [
             {"expected": expected_structure, "actual": actual_structure}
         ]
-    logger.info(f"result: {result}")
+    logger.info(f"validation result: {result}")
     return result
 
 
@@ -99,8 +99,6 @@ def fetch_storage_uri():
         
         output_data_uri = replace_name_in_path(output_data_path, run.id)
 
-        logger.info(f"Output data URI: {output_data_uri}, output_data_path: {output_data_path}")
-
         # Extract datastore name and path from the AzureML URI
         datastore_name, path = extract_datastore_info(output_data_uri)
         
@@ -131,7 +129,7 @@ def fetch_path(output_dir):
     try:
         # Calculate relative path from the job folder
         rel_path = os.path.relpath(output_dir, os.getcwd())
-        logger.info(f"rel_path: {rel_path}")
+        logger.info(f"api inference validation relative path: {rel_path}")
         result_dict = {
             'api_inference_path': rel_path
         }
@@ -157,14 +155,13 @@ def get_storage_url(datastore_name):
     """Retrieve the storage URL for the specified datastore."""
     # Get MLClient instance
     ml_client = get_mlclient()
-    logger.info(f"ml_client: {ml_client}")
     datastore = ml_client.datastores.get(datastore_name)
     storage_account_name = datastore.account_name
     container_name = datastore.container_name
     endpoint = datastore.endpoint
 
     storage_uri = f"https://{storage_account_name}.blob.{endpoint}/{container_name}"
-    logger.info(f"storage_uri: {storage_uri}")
+    logger.info(f"validation result storage: {storage_uri}")
 
     return storage_uri
 
@@ -197,7 +194,7 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--inference_payload", type=str, required=True,
                         help="Path to the expected inference response JSON file.")
-    parser.add_argument("--expected_response", type=str, required=True,
+    parser.add_argument("--expected_response", type=str, required=False,
                         help="Path to the expected inference response JSON file.")
     parser.add_argument("--inference_response", type=str, required=True,
                         help="Path to the actual inference response JSON file.")
@@ -210,16 +207,25 @@ def main():
 
     # Load expected and actual responses.
     inference_payload = load_json(args.inference_payload)
-    expected_response = load_json(args.expected_response)
     inference_response = load_json(args.inference_response)
+    if args.expected_response is None:
+        expected_response = load_json(args.expected_response)
+    else:
+        expected_response = None
     logger.info(f"expected response: {expected_response}, actual response: {inference_response}")
 
-    if expected_response is None or inference_response is None:
-        logger.warning("One or both JSON files could not be loaded.")
-        return
-
-    # Compare the JSON structures.
-    validation_result = compare_structures(inference_payload, expected_response, inference_response)
+    if expected_response:
+        validation_result = compare_structures(inference_payload, expected_response, inference_response)
+    else:
+        validation_result = {
+            "inference_payload": inference_payload,
+            "inference_output": inference_response,
+            "structure_match": None,
+            "expected_structure": None,
+            "actual_structure": get_json_structure(inference_response),
+            "differences": []
+        }
+        logger.info("No expected response provided. Skipping structure comparison.")
 
     # Save the validation result.
     save_validation_result(validation_result, args.validation_result)

From d577a6d6dcd08efeed4d6799b2a03b19925a88e8 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Mon, 31 Mar 2025 10:44:03 +0530
Subject: [PATCH 03/29] Updated MaaP selfserve validation pipeline component

---
 .../training/model_management/src/azureml/model/mgmt/config.py   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/assets/training/model_management/src/azureml/model/mgmt/config.py b/assets/training/model_management/src/azureml/model/mgmt/config.py
index 7de73aa0fe..114e1dcc03 100644
--- a/assets/training/model_management/src/azureml/model/mgmt/config.py
+++ b/assets/training/model_management/src/azureml/model/mgmt/config.py
@@ -48,6 +48,7 @@ class AppName:
     CONVERT_MODEL_TO_MLFLOW = "convert_model_to_mlflow"
     VALIDATION_TRIGGER_IMPORT = "validation_trigger_import"
     RUN_INFERENCE_VALIDATION = "run_inference_validation"
+    PUBHLISH_VALIDATION_RESULTS_SELF_SERVE = "publish_validation_results_self_serve"
 
 
 class LoggerConfig:

From 7faef922a24a7d30e323e3a4c1046eda446a0351 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Mon, 31 Mar 2025 13:53:06 +0530
Subject: [PATCH 04/29] Updated the deploy aml component

---
 assets/common/src/deploy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 8920ba1792..753d4e9141 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -165,7 +165,7 @@ def parse_args():
     parser.add_argument(
         "--inference_response",
         type=str,
-        help="Json file to save the inference response",
+        help="Path to the inference response JSON file.",
     )
     # parse args
     args = parser.parse_args()

From ba4e70f4af4b0b2aa52d7bcc94726a156e335983 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Mon, 31 Mar 2025 13:59:37 +0530
Subject: [PATCH 05/29] renamed publish_result component

---
 .../asset.yaml                                                 | 0
 .../spec.yaml                                                  | 0
 .../src/publish_validation_results_selfserve.py                | 3 +++
 3 files changed, 3 insertions(+)
 rename assets/training/model_management/components/{publish_result_selfserve => publish_validation_results_selfserve}/asset.yaml (100%)
 rename assets/training/model_management/components/{publish_result_selfserve => publish_validation_results_selfserve}/spec.yaml (100%)

diff --git a/assets/training/model_management/components/publish_result_selfserve/asset.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/asset.yaml
similarity index 100%
rename from assets/training/model_management/components/publish_result_selfserve/asset.yaml
rename to assets/training/model_management/components/publish_validation_results_selfserve/asset.yaml
diff --git a/assets/training/model_management/components/publish_result_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
similarity index 100%
rename from assets/training/model_management/components/publish_result_selfserve/spec.yaml
rename to assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index cbdfe14621..a0d7290df5 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -1,3 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
 """Update model onboarding version with CommonBench results."""
 
 import sys

From d544040276ef092433ff92e0a772b10efbb6e6de Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Mon, 31 Mar 2025 20:40:27 +0530
Subject: [PATCH 06/29] updated the publish result component

---
 .../spec.yaml                                 |   1 +
 .../publish_validation_results_selfserve.py   | 125 ++++++++++--------
 2 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
index 23ad9f9d40..85079a786b 100644
--- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
+++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
@@ -47,5 +47,6 @@ command: >-
   --model-name ${{inputs.model_name}} 
   --model-version ${{inputs.model_version}} 
   --publisher-name ${{inputs.publisher_name}} 
+  --validation-id ${{inputs.validation_id}} 
   --sku ${{inputs.sku}} 
   --metrics-storage-uri ${{inputs.metrics_storage_uri}}
\ No newline at end of file
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index a0d7290df5..16c87350b8 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -32,78 +32,87 @@ def read_results_from_file(file_path):
         print(f"Error reading from file: {e}")
         return None
 
-
-def update_model_onboarding_version(
-    publisher_name,
-    model_name,
-    model_version,
-    selfserve_base_url,
-    sku,
-    metrics_storage_uri
-):
-    """Update model onboarding version with benchmark results."""
-    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-
+def get_auth_token():
+    """Generate auth token for Azure API."""
     is_obo = False
+    tokenUri = "https://management.azure.com/.default"
+    token = None
+
     try:
         credential = AzureMLOnBehalfOfCredential()
-        token = credential.get_token(
-            "https://management.azure.com/.default").token
+        token = credential.get_token(tokenUri).token
         is_obo = True
-    except Exception as ex:
-        logger.warning(f"Failed to get OBO credentials - {ex}")
+    except Exception:
+        logger.warning(
+            "Failed to get user credentials, fetching MSI credentials")
 
     if not is_obo:
         try:
-            logger.info("Fetching MSI credential")
             msi_client_id = os.environ.get("DEFAULT_IDENTITY_CLIENT_ID")
             credential = ManagedIdentityCredential(client_id=msi_client_id)
-            token = credential.get_token(
-                "https://management.azure.com/.default").token
+            token = credential.get_token(tokenUri).token
         except Exception as ex:
-            raise (f"Failed to get MSI credentials : {ex}")
+            raise Exception(f"Failed to get MSI credentials : {ex}")
 
-    metrics_path_dict = read_results_from_file(metrics_storage_uri)
+    return token
 
-    run_id = str(uuid.uuid4())
+
+def update_model_onboarding_version(
+    publisher_name,
+    model_name,
+    model_version,
+    sku,
+    validation_id,
+    selfserve_base_url,
+    metrics_storage_uri
+):
+    """Update model onboarding version with benchmark results."""
+    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    metrics_path_dict = read_results_from_file(metrics_storage_uri)
 
     validation_result = []
 
-    if metrics_path_dict.get("perf_bench_path") is not None:
-        validation_result.append({
-            "runId": run_id,
-            "type": "PERF_BENCHMARK",
-            "passed": True,
-            "message": "Baseline data is captured successfully",
-            "validationResultUrl": metrics_path_dict.get("perf_bench_path"),
-            "createdTime": current_time,
-            "status": "success",
-            "sku": sku
-        })
-
-    if metrics_path_dict.get("api_validation_path") is not None:
-        validation_result.append({
-            "runId": run_id,
-            "type": "API_VALIDATION",
-            "passed": True,
-            "message": "API validation passed successfully",
-            "validationResultUrl": metrics_path_dict.get("api_validation_path"),
-            "status": "success",
-            "createdTime": current_time,
-            "sku": sku
-        })
-
-    if metrics_path_dict.get("api_inference_path") is not None:
-        validation_result.append({
-            "runId": run_id,
-            "type": "API_VALIDATION",
-            "passed": True,
-            "message": "API inference passed successfully",
-            "validationResultUrl": metrics_path_dict.get("api_inference_path"),
-            "status": "success",
-            "createdTime": current_time,
-            "sku": sku
-        })
+    if validation_id is not None:
+        if metrics_path_dict.get("perf_bench_path") is not None:
+            validation_result.append({
+                "Id": validation_id,
+                "type": "PERF_BENCHMARK",
+                "passed": True,
+                "message": "Baseline data is captured successfully",
+                "validationResultUrl": metrics_path_dict.get("perf_bench_path"),
+                "createdTime": current_time,
+                "status": "success",
+                "sku": sku
+            })
+
+        if metrics_path_dict.get("api_validation_path") is not None:
+            validation_result.append({
+                "Id": validation_id,
+                "type": "API_VALIDATION",
+                "passed": True,
+                "message": "API validation passed successfully",
+                "validationResultUrl": metrics_path_dict.get("api_validation_path"),
+                "status": "success",
+                "createdTime": current_time,
+                "sku": sku
+            })
+
+        if metrics_path_dict.get("api_inference_path") is not None:
+            validation_result.append({
+                "Id": validation_id,
+                "type": "API_VALIDATION",
+                "passed": True,
+                "message": "API inference passed successfully",
+                "validationResultUrl": metrics_path_dict.get("api_inference_path"),
+                "status": "success",
+                "createdTime": current_time,
+                "sku": sku
+            })
+    else:
+        logger.error(
+            "Validation run ID is None, not updating validation results in self-serve")
+        sys.exit(1)
 
     payload = {
         "suggestedSKU": sku,
@@ -115,7 +124,7 @@ def update_model_onboarding_version(
     api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31"
 
     headers = {
-        "Authorization": f"Bearer {token}",
+        "Authorization": f"Bearer {get_auth_token()}",
         "Content-Type": "application/json",
         "User-Agent": "AzureML-ModelPublishing/1.0"
     }

From b0743b592d8b49b49078a61487b4bb5cc52e3693 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Tue, 1 Apr 2025 19:19:24 +0530
Subject: [PATCH 07/29] Updated publish validation result

---
 .../spec.yaml                                 |  4 ++
 .../validate_model_inference/spec.yaml        |  6 ++
 .../publish_validation_results_selfserve.py   | 59 +++++--------------
 3 files changed, 24 insertions(+), 45 deletions(-)

diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
index 85079a786b..531eb689a0 100644
--- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
+++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
@@ -33,6 +33,10 @@ inputs:
     optional: false
     default: "Standard_NC24ads_A100_v4"
     description: Suggested SKU based on benchmark results
+  validation_id:
+    type: string
+    optional: false
+    description: ID of the validation run (used for updating status in self-serve)
   metrics_storage_uri:
     type: uri_file
     optional: false
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index ebf3150e53..e461840015 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -116,6 +116,11 @@ inputs:
     default: default
     description: Name of the deployment
 
+  validation_id:
+    type: string
+    optional: true
+    description: ID of the validation run (used for updating status in self-serve)
+
   expected_inference_response:
     type: uri_file
     optional: true
@@ -177,5 +182,6 @@ jobs:
       model_name: ${{parent.inputs.model_name}}
       model_version: ${{parent.inputs.model_version}}
       sku: ${{parent.inputs.instance_type}}
+      validation_id: ${{parent.inputs.validation_id}}
       selfserve_base_url: ${{parent.inputs.selfserve_base_url}}
       metrics_storage_uri: ${{parent.jobs.run_inference_validation.outputs.metrics_storage_uri}}
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index 16c87350b8..c749b80705 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -12,7 +12,6 @@
 from datetime import datetime, timezone
 from azure.identity import ManagedIdentityCredential
 from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
-from datetime import datetime
 from azureml.model.mgmt.config import AppName
 from azureml.model.mgmt.utils.logging_utils import custom_dimensions, get_logger
 
@@ -71,57 +70,24 @@ def update_model_onboarding_version(
 
     metrics_path_dict = read_results_from_file(metrics_storage_uri)
 
-    validation_result = []
-
-    if validation_id is not None:
-        if metrics_path_dict.get("perf_bench_path") is not None:
-            validation_result.append({
-                "Id": validation_id,
-                "type": "PERF_BENCHMARK",
-                "passed": True,
-                "message": "Baseline data is captured successfully",
-                "validationResultUrl": metrics_path_dict.get("perf_bench_path"),
-                "createdTime": current_time,
-                "status": "success",
-                "sku": sku
-            })
-
-        if metrics_path_dict.get("api_validation_path") is not None:
-            validation_result.append({
-                "Id": validation_id,
-                "type": "API_VALIDATION",
-                "passed": True,
-                "message": "API validation passed successfully",
-                "validationResultUrl": metrics_path_dict.get("api_validation_path"),
-                "status": "success",
-                "createdTime": current_time,
-                "sku": sku
-            })
-
-        if metrics_path_dict.get("api_inference_path") is not None:
-            validation_result.append({
-                "Id": validation_id,
-                "type": "API_VALIDATION",
-                "passed": True,
-                "message": "API inference passed successfully",
-                "validationResultUrl": metrics_path_dict.get("api_inference_path"),
-                "status": "success",
-                "createdTime": current_time,
-                "sku": sku
-            })
+    validationResultUrl = None
+
+    if validation_id:
+        if metrics_path_dict.get("api_inference_path"):
+            validationResultUrl = metrics_path_dict.get("api_inference_path")
     else:
         logger.error(
             "Validation run ID is None, not updating validation results in self-serve")
         sys.exit(1)
 
     payload = {
-        "suggestedSKU": sku,
-        "status": "Validation",
-        "subStatus": "Validation_Successful",
-        "validationResult": validation_result
+        "passed": True,
+        "status": "Completed",
+        "message": "Validation Successful",
+        "validationResult": validationResultUrl
     }
 
-    api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31"
+    api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/versions/{model_version}/validations/{validation_id}/updateValidationResult?api-version=2024-12-31"
 
     headers = {
         "Authorization": f"Bearer {get_auth_token()}",
@@ -164,6 +130,8 @@ def update_model_onboarding_version(
     parser.add_argument("--selfserve-base-url", required=True,
                         default="https://int.api.azureml-test.ms",
                         help="Base URL of the model publisher self-serve API")
+    parser.add_argument("--validation-id", required=True,
+                        help="Run ID of the validation run")
     parser.add_argument("--metrics-storage-uri", required=True,
                         help="URI to the storage where validation metrics are stored")
     parser.add_argument("--sku", required=False,
@@ -178,8 +146,9 @@ def update_model_onboarding_version(
             args.publisher_name,
             args.model_name,
             args.model_version,
-            args.selfserve_base_url,
             args.sku,
+            args.validation_id,
+            args.selfserve_base_url,
             args.metrics_storage_uri
         )
         logger.info("Model onboarding version update completed successfully")

From aebe16a8519d680895ee9c65f2e35fdb12cef83e Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Wed, 2 Apr 2025 14:22:18 +0530
Subject: [PATCH 08/29] Updated validation result publish API

---
 .../publish_validation_results_selfserve.py   | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index c749b80705..adad260b9a 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -70,24 +70,33 @@ def update_model_onboarding_version(
 
     metrics_path_dict = read_results_from_file(metrics_storage_uri)
 
-    validationResultUrl = None
+    validation_result = []
 
     if validation_id:
-        if metrics_path_dict.get("api_inference_path"):
-            validationResultUrl = metrics_path_dict.get("api_inference_path")
+        if metrics_path_dict.get("api_inference_path", None):
+            validation_result.append({
+                "Id": validation_id,
+                "type": "API_VALIDATION",
+                "passed": True,
+                "message": "API inference passed successfully",
+                "validationResultUrl": metrics_path_dict.get("api_inference_path"),
+                "status": "success",
+                "createdTime": current_time,
+                "sku": sku
+            })
     else:
         logger.error(
-            "Validation run ID is None, not updating validation results in self-serve")
+            "Validation  ID is None, not updating validation results in self-serve")
         sys.exit(1)
 
     payload = {
         "passed": True,
         "status": "Completed",
         "message": "Validation Successful",
-        "validationResult": validationResultUrl
+        "validationResult": validation_result
     }
 
-    api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/versions/{model_version}/validations/{validation_id}/updateValidationResult?api-version=2024-12-31"
+    api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31"
 
     headers = {
         "Authorization": f"Bearer {get_auth_token()}",

From 380ca49e88a466d7367dd0ac351540f5ebc32684 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Tue, 8 Apr 2025 12:43:26 +0530
Subject: [PATCH 09/29] Updated the AML pipeline validation component

---
 .../common/components/deploy_model/spec.yaml  |  8 ++-
 assets/common/src/deploy.py                   | 55 ++++++++++++----
 .../run_inference_validation/spec.yaml        |  4 +-
 .../validate_model_inference/spec.yaml        | 14 ++---
 .../publish_validation_results_selfserve.py   |  2 +
 .../src/run_inference_validation.py           | 62 +++++++++++++------
 6 files changed, 104 insertions(+), 41 deletions(-)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index 5add6652a6..eea9f29b4a 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.12
+version: 0.0.12.10
 type: command
 
 is_deterministic: True
@@ -17,6 +17,7 @@ command: >-
   $[[--registration_details_folder ${{inputs.registration_details_folder}}]]
   $[[--model_id ${{inputs.model_id}}]]
   $[[--inference_payload ${{inputs.inference_payload}}]]
+  $[[--inference_payload_str ${{inputs.inference_payload_str}}]]
   $[[--endpoint_name ${{inputs.endpoint_name}}]]
   $[[--deployment_name ${{inputs.deployment_name}}]]
   $[[--instance_type ${{inputs.instance_type}}]]
@@ -57,6 +58,11 @@ inputs:
     optional: true
     description: JSON payload which would be used to validate deployment
 
+  inference_payload_str:
+    type: string
+    optional: true
+    description: Serialized JSON payload which would be used to validate deployment
+
   endpoint_name:
     type: string
     optional: true
diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 753d4e9141..65a097ae34 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -6,6 +6,7 @@
 import json
 import re
 import time
+import base64
 
 from azure.ai.ml.entities import (
     ManagedOnlineEndpoint,
@@ -60,6 +61,11 @@ def parse_args():
         type=Path,
         help="Json file with inference endpoint payload.",
     )
+    parser.add_argument(
+        "--inference_payload_str",
+        type=str,
+        help="Serialized JSON payload for inference.",
+    )
     parser.add_argument(
         "--endpoint_name",
         type=str,
@@ -306,23 +312,50 @@ def main():
         args=args
     )
 
-    inference_result = None
-    if args.inference_payload:
+    response = None
+    if args.inference_payload or args.inference_payload_str:
         print("Invoking inference with test payload ...")
         try:
-            response = ml_client.online_endpoints.invoke(
-                endpoint_name=endpoint_name,
-                deployment_name=deployment_name,
-                request_file=args.inference_payload,
-            )
-            print(f"Response:\n{response}")
-            logger.info(f"Endpoint invoked successfully with response :{response}")
+            start_time = time.time()
+            if args.inference_payload_str:
+                print(f"Inference payload string: {args.inference_payload_str}")
+                decoded_bytes = base64.b64decode(args.inference_payload_str)
+
+                # Convert bytes to string
+                decoded_str = decoded_bytes.decode('utf-8')
+                logger.info(f"Decoded string: {decoded_str}")
+
+                payload = json.loads(decoded_str)
+                logger.info(f"Payload:\n {payload}")
+
+                with open("payload.json", "w") as temp_file:
+                    json.dump(payload, temp_file)
+
+                response = ml_client.online_endpoints.invoke(
+                    endpoint_name=endpoint_name,
+                    deployment_name=deployment_name,
+                    request_file="payload.json",
+                )
+            elif args.inference_payload:
+                response = ml_client.online_endpoints.invoke(
+                    endpoint_name=endpoint_name,
+                    deployment_name=deployment_name,
+                    request_file=args.inference_payload,
+                )
+
+            end_time = time.time()
+            inference_time_ms = int((end_time - start_time) * 1000)
+
+            logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms and response: {response}")
             # Save inference response
-            inference_result = response
             if args.inference_response:
+                inference_result = {
+                    "response": response,
+                    "inference_time": inference_time_ms
+                }
                 with open(args.inference_response, "w") as f:
                     json.dump(inference_result, f, indent=4)
-                logger.info("Saved inference response to output JSON file.")
+                logger.info("Saved inference response and inference time to output JSON file.")
         except Exception as e:
             raise AzureMLException._with_error(
                 AzureMLError.create(OnlineEndpointInvocationError, exception=e)
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index 4859e8d763..adf4cd3c49 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: run_inference_validation
-version: 0.0.1
+version: 0.0.1.17
 type: command
 
 is_deterministic: True
@@ -21,7 +21,7 @@ command: >-
 
 inputs:
   inference_payload:
-    type: uri_file
+    type: string
     description: JSON input payload used for inference.
 
   expected_response:
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index e461840015..8423b3ce28 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -4,7 +4,7 @@ type: pipeline
 name: validate_model_inference
 display_name: Validate Model Inference
 description: deploy a model and validate it using a sample payload
-version: 0.0.1
+version: 0.0.1.17
 
 experiment_name: validate_model
 
@@ -101,7 +101,7 @@ inputs:
     description: SKU of the deployed model endpoint.
 
   inference_payload:
-    type: uri_file
+    type: string
     optional: true
     description: JSON payload which would be used to validate deployment
 
@@ -135,11 +135,11 @@ outputs:
 jobs:
   online_deployment_model:
     type: command
-    component: azureml:deploy_model:0.0.12
+    component: component: azureml:deploy_model:0.0.12.9
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
-      inference_payload: ${{parent.inputs.inference_payload}}
+      inference_payload_str: ${{parent.inputs.inference_payload}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
       deployment_name: ${{parent.inputs.deployment_name}}
       instance_type: ${{parent.inputs.instance_type}}
@@ -154,7 +154,7 @@ jobs:
 
   run_inference_validation:
     type: command
-    component: azureml:run_inference_validation:0.0.1
+    component: component: azureml:run_inference_validation:0.0.1.17
     identity:
       type: user_identity
     inputs:
@@ -166,7 +166,7 @@ jobs:
 
   delete_endpoints:
     type: command
-    component: azureml:delete_endpoint:0.0.7
+    component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1
     inputs:
       model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
@@ -176,7 +176,7 @@ jobs:
 
   publish_results:
     type: command
-    component: azureml:publish_validation_results_selfserve:0.0.1
+    component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.8
     inputs:
       publisher_name: ${{parent.inputs.publisher_name}}
       model_name: ${{parent.inputs.model_name}}
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index adad260b9a..b37c11b0e0 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -76,12 +76,14 @@ def update_model_onboarding_version(
         if metrics_path_dict.get("api_inference_path", None):
             validation_result.append({
                 "Id": validation_id,
+                "runId": validation_id,
                 "type": "API_VALIDATION",
                 "passed": True,
                 "message": "API inference passed successfully",
                 "validationResultUrl": metrics_path_dict.get("api_inference_path"),
                 "status": "success",
                 "createdTime": current_time,
+                "updatedTime": current_time,
                 "sku": sku
             })
     else:
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 0edbb65841..0c5f624985 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -33,6 +33,16 @@ def load_json(file_path):
         return None
 
 
+def load_json_from_string(json_string):
+    """Load JSON data from a string."""
+    try:
+        data = json.loads(json_string)
+        return data
+    except Exception as e:
+        logger.warning(f"Error parsing JSON from string: {e}")
+        return None
+
+
 def get_json_structure(data):
     """
     Recursively extract the structure of JSON (keys only).
@@ -50,7 +60,7 @@ def get_json_structure(data):
         return None
 
 
-def compare_structures(inference_payload, expected_response, inference_response):
+def compare_structures(inference_payload, expected_response, inference_response, success_status, inference_time):
     """
     Compare JSON structures (keys only) of expected and actual.
 
@@ -61,12 +71,13 @@ def compare_structures(inference_payload, expected_response, inference_response)
     logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}")
 
     result = {
-        "inference_payload": inference_payload,
-        "inference_output": inference_response, 
-        "structure_match": expected_structure == actual_structure,
-        "expected_structure": expected_structure,
-        "actual_structure": actual_structure,
-        "differences": []
+        "success": success_status,
+        "inference_time" : inference_time,
+        "sample_request": inference_payload,
+        "sample_response": expected_response,
+        "actual_response": inference_response,
+        "structure_match": expected_structure == actual_structure if expected_response else None,
+        "structural_difference": []
     }
 
     if not result["structure_match"]:
@@ -193,7 +204,7 @@ def main():
     """Compare expected and actual inference response structures."""
     parser = argparse.ArgumentParser()
     parser.add_argument("--inference_payload", type=str, required=True,
-                        help="Path to the expected inference response JSON file.")
+                        help="Serialized JSON payload for inference")
     parser.add_argument("--expected_response", type=str, required=False,
                         help="Path to the expected inference response JSON file.")
     parser.add_argument("--inference_response", type=str, required=True,
@@ -206,24 +217,35 @@ def main():
     args = parser.parse_args()
 
     # Load expected and actual responses.
-    inference_payload = load_json(args.inference_payload)
-    inference_response = load_json(args.inference_response)
-    if args.expected_response is None:
-        expected_response = load_json(args.expected_response)
-    else:
-        expected_response = None
+    inference_payload = load_json_from_string(args.inference_payload)
+    inference_output = load_json(args.inference_response)
+
+    expected_response = load_json(args.expected_response) if args.expected_response else None
     logger.info(f"expected response: {expected_response}, actual response: {inference_response}")
 
+    inference_response = inference_output.get("response")
+    inference_time = inference_output.get("inference_time_ms", 0)  # Default to 0 if not present
+
+    # Infer success status based on the presence of a valid response
+    success_status = inference_response is not None and bool(inference_response)
+
     if expected_response:
-        validation_result = compare_structures(inference_payload, expected_response, inference_response)
+        validation_result = validation_result = compare_structures(
+            inference_payload,
+            expected_response,
+            inference_response,
+            success_status,
+            inference_time
+        )
     else:
         validation_result = {
-            "inference_payload": inference_payload,
-            "inference_output": inference_response,
+            "success": success_status,
+            "inference_time": inference_time,
+            "sample_request": inference_payload,
+            "sample_response": expected_response,
+            "actual_response": inference_response,
             "structure_match": None,
-            "expected_structure": None,
-            "actual_structure": get_json_structure(inference_response),
-            "differences": []
+            "actual_structure": []
         }
         logger.info("No expected response provided. Skipping structure comparison.")
 

From 29aa0cf80eb03bab60e253f49bc532a3f84c6c6a Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Wed, 9 Apr 2025 13:39:16 +0000
Subject: [PATCH 10/29] maap self-serve validation

---
 .../common/components/deploy_model/spec.yaml  |   2 +-
 assets/common/src/deploy.py                   |   7 +-
 .../run_inference_validation/spec.yaml        |  15 ++-
 .../validate_model_inference/spec.yaml        |  14 +--
 .../src/run_inference_validation.py           | 110 +++++++++++-------
 5 files changed, 89 insertions(+), 59 deletions(-)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index eea9f29b4a..71483ad276 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.12.10
+version: 0.0.12
 type: command
 
 is_deterministic: True
diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 65a097ae34..53424150cc 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -355,7 +355,7 @@ def main():
                 }
                 with open(args.inference_response, "w") as f:
                     json.dump(inference_result, f, indent=4)
-                logger.info("Saved inference response and inference time to output JSON file.")
+                logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}")
         except Exception as e:
             raise AzureMLException._with_error(
                 AzureMLError.create(OnlineEndpointInvocationError, exception=e)
@@ -379,11 +379,6 @@ def main():
         outfile.write(json_object)
     logger.info("Saved deployment details in output json file.")
 
-    if response and args.inference_response:
-        with open(args.inference_response, "w") as outfile:
-            outfile.write(response)
-        logger.info("Saved inference response in output json file.")
-
 # run script
 if __name__ == "__main__":
     # run main function
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index adf4cd3c49..7054de1bd8 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: run_inference_validation
-version: 0.0.1.17
+version: 0.0.1
 type: command
 
 is_deterministic: True
@@ -16,6 +16,8 @@ command: >-
   --inference_payload ${{inputs.inference_payload}}
   $[[--expected_response ${{inputs.expected_response}}]]
   --inference_response ${{inputs.inference_response}}
+  --validation-id ${{inputs.validation_id}}
+  --sku ${{inputs.sku}}
   --validation_result ${{outputs.validation_result}}
   --metrics_storage_uri ${{outputs.metrics_storage_uri}}
 
@@ -33,6 +35,17 @@ inputs:
     type: uri_file
     description: JSON file containing the actual inference response from the deployed model.
 
+  sku:
+    type: string
+    optional: false
+    default: "Standard_NC24ads_A100_v4"
+    description: Suggested SKU based on benchmark results
+
+  validation_id:
+    type: string
+    optional: false
+    description: ID of the validation run (used for updating status in self-serve)
+
 outputs:
   validation_result:
     type: uri_file
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index 8423b3ce28..45b1bb8575 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -4,9 +4,7 @@ type: pipeline
 name: validate_model_inference
 display_name: Validate Model Inference
 description: deploy a model and validate it using a sample payload
-version: 0.0.1.17
-
-experiment_name: validate_model
+version: 0.0.1
 
 inputs:
   compute:
@@ -135,7 +133,7 @@ outputs:
 jobs:
   online_deployment_model:
     type: command
-    component: component: azureml:deploy_model:0.0.12.9
+    component: azureml:deploy_model:0.0.12
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
@@ -154,10 +152,12 @@ jobs:
 
   run_inference_validation:
     type: command
-    component: component: azureml:run_inference_validation:0.0.1.17
+    component: azureml:run_inference_validation:0.0.1
     identity:
       type: user_identity
     inputs:
+      validation_id: ${{parent.inputs.validation_id}}
+      sku: ${{parent.inputs.instance_type}}
       inference_payload: ${{parent.inputs.inference_payload}}
       expected_response: ${{parent.inputs.expected_inference_response}}
       inference_response: ${{parent.jobs.online_deployment_model.outputs.inference_response}}
@@ -166,7 +166,7 @@ jobs:
 
   delete_endpoints:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1
+    component: azureml:delete_endpoint:0.0.7
     inputs:
       model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
@@ -176,7 +176,7 @@ jobs:
 
   publish_results:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.8
+    component: azureml:publish_validation_results_selfserve:0.0.1
     inputs:
       publisher_name: ${{parent.inputs.publisher_name}}
       model_name: ${{parent.inputs.model_name}}
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 0c5f624985..f25e4e2a72 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -3,9 +3,12 @@
 
 """Validate the structure of expected and actual inference response JSON files."""
 
+import base64
 import json
 import argparse
 import os
+import sys
+from datetime import datetime, timezone
 from azureml.core import Run
 from azureml.model.mgmt.utils.common_utils import get_mlclient
 from azureml.model.mgmt.config import AppName
@@ -59,44 +62,53 @@ def get_json_structure(data):
     else:
         return None
 
-
-def compare_structures(inference_payload, expected_response, inference_response, success_status, inference_time):
+def compare_structures(expected_response, actual_response):
     """
     Compare JSON structures (keys only) of expected and actual.
 
-    Returns a dictionary with validation results.
+    Returns a dictionary with structural differences and a match flag.
     """
     expected_structure = get_json_structure(expected_response)
-    actual_structure = get_json_structure(inference_response)
+    actual_structure = get_json_structure(actual_response)
     logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}")
 
-    result = {
-        "success": success_status,
-        "inference_time" : inference_time,
-        "sample_request": inference_payload,
-        "sample_response": expected_response,
-        "actual_response": inference_response,
-        "structure_match": expected_structure == actual_structure if expected_response else None,
-        "structural_difference": []
-    }
+    structure_match = expected_structure == actual_structure if expected_response else None
+    structural_difference = []
 
-    if not result["structure_match"]:
-        result["differences"] = [
+    if not structure_match:
+        structural_difference = [
             {"expected": expected_structure, "actual": actual_structure}
         ]
-    logger.info(f"validation result: {result}")
-    return result
 
+    logger.info(f"Structure match: {structure_match}, Structural differences: {structural_difference}")
+    return {
+        "structure_match": structure_match,
+        "structural_difference": structural_difference
+    }
 
-def save_validation_result(result, output_path):
+
+def save_validation_result(request_details, output_path, validation_id, sku, status):
     """Save validation results to a JSON file."""
     try:
+        current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+        validation_result = {
+            "id": validation_id,
+            "runId": validation_id,
+            "sku": sku,
+            "createdTime": current_time,
+            "updatedTime": current_time,
+            "type": "MAAP_INFERENCING",
+            "status": status,
+            "requestDetails": request_details
+        }
+
         with open(output_path, "w") as f:
-            json.dump(result, f, indent=4)
+            json.dump(validation_result, f, indent=4)
         logger.info(f"Validation result saved to {output_path}")
     except Exception as e:
         logger.error(f"Error saving validation result: {e}")
 
+
 def replace_name_in_path(path_template, name_value):
     """Replace the placeholder in the output path with the actual job name."""
     return path_template.replace('${{name}}', name_value)
@@ -213,45 +225,55 @@ def main():
                         help="Path to save validation results.")
     parser.add_argument("--metrics_storage_uri", type=str, required=True,
                         help="Path to store the metrics.")
+    parser.add_argument("--sku", required=False,
+                        default="Standard_NC24ads_A100_v4",
+                        help="Suggested SKU based on benchmark results")
+    parser.add_argument("--validation-id", required=True,
+                        help="Run ID of the validation run")
 
     args = parser.parse_args()
 
-    # Load expected and actual responses.
-    inference_payload = load_json_from_string(args.inference_payload)
+    inference_payload = None
+    if args.inference_payload:
+        decoded_bytes = base64.b64decode(args.inference_payload)
+
+        # Convert bytes to string
+        decoded_str = decoded_bytes.decode('utf-8')
+        logger.info(f"Decoded string: {decoded_str}")
+
+        inference_payload = json.loads(decoded_str)
+
+    inference_output = load_json(args.inference_response)
+    if not inference_output:
+        logger.error("Inference output is missing or invalid.")
+        sys.exit(1)
+
     inference_output = load_json(args.inference_response)
 
     expected_response = load_json(args.expected_response) if args.expected_response else None
-    logger.info(f"expected response: {expected_response}, actual response: {inference_response}")
 
     inference_response = inference_output.get("response")
-    inference_time = inference_output.get("inference_time_ms", 0)  # Default to 0 if not present
+    inference_time = inference_output.get("inference_time", 0)
+    logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, actual response: {inference_response}")
 
     # Infer success status based on the presence of a valid response
     success_status = inference_response is not None and bool(inference_response)
-
+    status = "Success" if success_status else "Failed"
+
+    request_details = {
+        "inputRequest": inference_payload,
+        "inputResponse": expected_response,
+        "actualResponse": inference_response,
+        "responseTime": inference_time,
+        "structuralDiff": None,
+    }
     if expected_response:
-        validation_result = validation_result = compare_structures(
-            inference_payload,
-            expected_response,
-            inference_response,
-            success_status,
-            inference_time
-        )
-    else:
-        validation_result = {
-            "success": success_status,
-            "inference_time": inference_time,
-            "sample_request": inference_payload,
-            "sample_response": expected_response,
-            "actual_response": inference_response,
-            "structure_match": None,
-            "actual_structure": []
-        }
-        logger.info("No expected response provided. Skipping structure comparison.")
+        comparison_result = compare_structures(expected_response, inference_response)
+        request_details["structuralDiff"] = comparison_result.get("structural_difference", [])
 
     # Save the validation result.
-    save_validation_result(validation_result, args.validation_result)
-    logger.info(f"validation_result: {validation_result}, Validation result saved to {args.validation_result}")
+    save_validation_result(request_details, args.validation_result, args.validation_id, args.sku, status)
+    logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_result}")
 
     store_metrics_paths(args.metrics_storage_uri)
 

From fbd837166e43fc8a866c65cedf91f1b87e77e25b Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Wed, 9 Apr 2025 19:39:10 +0530
Subject: [PATCH 11/29] maap-self-serve validation pipeline update

---
 assets/common/src/deploy.py                               | 2 +-
 .../src/publish_validation_results_selfserve.py           | 2 +-
 .../model_management/src/run_inference_validation.py      | 8 +++++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 53424150cc..6c42a3e93d 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -379,7 +379,7 @@ def main():
         outfile.write(json_object)
     logger.info("Saved deployment details in output json file.")
 
-# run script
+
 if __name__ == "__main__":
     # run main function
     main()
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index b37c11b0e0..8e012e38d0 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -5,7 +5,6 @@
 
 import sys
 import os
-import uuid
 import json
 import requests
 import argparse
@@ -31,6 +30,7 @@ def read_results_from_file(file_path):
         print(f"Error reading from file: {e}")
         return None
 
+
 def get_auth_token():
     """Generate auth token for Azure API."""
     is_obo = False
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index f25e4e2a72..23d69ad338 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -62,6 +62,7 @@ def get_json_structure(data):
     else:
         return None
 
+
 def compare_structures(expected_response, actual_response):
     """
     Compare JSON structures (keys only) of expected and actual.
@@ -113,23 +114,24 @@ def replace_name_in_path(path_template, name_value):
     """Replace the placeholder in the output path with the actual job name."""
     return path_template.replace('${{name}}', name_value)
 
+
 def fetch_storage_uri():
     """Return the storage URI of the output file from the AzureML pipeline run."""
     try:
         run = Run.get_context()
         run_details = run.get_details()
         output_data_path = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']['path']
-        
+
         output_data_uri = replace_name_in_path(output_data_path, run.id)
 
         # Extract datastore name and path from the AzureML URI
         datastore_name, path = extract_datastore_info(output_data_uri)
-        
+
         # Construct the storage URI
         storage_uri = get_storage_url(datastore_name)
         full_storage_uri = f"{storage_uri}/{path}"
         logger.info(f"Full storage URI: {full_storage_uri}")
-        
+
         return full_storage_uri
     except Exception as e:
         logger.error(f"Error fetching storage URI: {e}")

From 1b020291a7b37dbee3bf075131b01f01bf484af4 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Wed, 9 Apr 2025 19:50:00 +0530
Subject: [PATCH 12/29] maap-self-serve fixed syntax issue

---
 assets/common/src/deploy.py                                 | 3 ++-
 .../src/publish_validation_results_selfserve.py             | 5 ++++-
 .../model_management/src/run_inference_validation.py        | 6 ++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 6c42a3e93d..022f785997 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -346,7 +346,8 @@ def main():
             end_time = time.time()
             inference_time_ms = int((end_time - start_time) * 1000)
 
-            logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms and response: {response}")
+            logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " +
+                        f"and response: {response}")
             # Save inference response
             if args.inference_response:
                 inference_result = {
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index 8e012e38d0..33e61f739b 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -98,7 +98,10 @@ def update_model_onboarding_version(
         "validationResult": validation_result
     }
 
-    api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31"
+    api_url = (
+        f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}"
+        f"/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31"
+    )
 
     headers = {
         "Authorization": f"Bearer {get_auth_token()}",
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 23d69ad338..96bf83ac05 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -120,7 +120,8 @@ def fetch_storage_uri():
     try:
         run = Run.get_context()
         run_details = run.get_details()
-        output_data_path = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']['path']
+        output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']
+        output_data_path = output_data['path']
 
         output_data_uri = replace_name_in_path(output_data_path, run.id)
 
@@ -256,7 +257,8 @@ def main():
 
     inference_response = inference_output.get("response")
     inference_time = inference_output.get("inference_time", 0)
-    logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, actual response: {inference_response}")
+    logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, "
+                f"actual response: {inference_response}")
 
     # Infer success status based on the presence of a valid response
     success_status = inference_response is not None and bool(inference_response)

From 277bb0284f71ec6f5174e4e5e7e3e64c090397c5 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Thu, 10 Apr 2025 10:38:58 +0530
Subject: [PATCH 13/29] maap-self-serve fixed syntax issue

---
 .../training/model_management/src/run_inference_validation.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 96bf83ac05..56cb1e4f69 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -268,7 +268,7 @@ def main():
         "inputRequest": inference_payload,
         "inputResponse": expected_response,
         "actualResponse": inference_response,
-        "responseTime": inference_time,
+        "responseTimeMs": inference_time,
         "structuralDiff": None,
     }
     if expected_response:

From 5c531946c950891cfaf2815e2ebe54df26ae6678 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhisheku@microsoft.com>
Date: Sun, 13 Apr 2025 22:27:33 +0530
Subject: [PATCH 14/29] Updated the environment used for deploy model component

---
 assets/common/components/deploy_model/spec.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index 71483ad276..7cf10a71ee 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.12
+version: 0.0.12.13
 type: command
 
 is_deterministic: True
@@ -9,7 +9,7 @@ display_name: Deploy model
 description:
   Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached.
 
-environment: azureml://registries/azureml/environments/python-sdk-v2/versions/28
+environment: azureml://registries/azureml/environments/python-sdk-v2/versions/29
 
 code: ../../src
 command: >-

From efa03c2b90d6550752c8d5706e85a272c1a12e1c Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Mon, 14 Apr 2025 03:37:41 +0000
Subject: [PATCH 15/29] Added inference response validation

---
 .../common/components/deploy_model/spec.yaml  |  6 ++--
 assets/common/src/deploy.py                   |  6 ++--
 .../run_inference_validation/spec.yaml        |  2 +-
 .../validate_model_inference/spec.yaml        | 10 +++----
 .../src/run_inference_validation.py           | 28 +++++++++++++++----
 5 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index 7cf10a71ee..f97bb2f870 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.12.13
+version: 0.0.12
 type: command
 
 is_deterministic: True
@@ -36,7 +36,7 @@ command: >-
   $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]]
   $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]]
   --model_deployment_details ${{outputs.model_deployment_details}}
-  --inference_response ${{outputs.inference_response}}
+  --model_inference_response ${{outputs.model_inference_response}}
 
 inputs:
   # Output of registering component
@@ -209,7 +209,7 @@ outputs:
   model_deployment_details:
     type: uri_file
     description: Json file to which deployment details will be written
-  inference_response:
+  model_inference_response:
     type: uri_file
     description: JSON file containing inference results
 
diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 022f785997..a4896c4dd7 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -169,7 +169,7 @@ def parse_args():
         help="Json file to which deployment details will be written",
     )
     parser.add_argument(
-        "--inference_response",
+        "--model_inference_response",
         type=str,
         help="Path to the inference response JSON file.",
     )
@@ -349,12 +349,12 @@ def main():
             logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " +
                         f"and response: {response}")
             # Save inference response
-            if args.inference_response:
+            if args.model_inference_response:
                 inference_result = {
                     "response": response,
                     "inference_time": inference_time_ms
                 }
-                with open(args.inference_response, "w") as f:
+                with open(args.model_inference_response, "w") as f:
                     json.dump(inference_result, f, indent=4)
                 logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}")
         except Exception as e:
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index 7054de1bd8..3c599b548a 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -27,7 +27,7 @@ inputs:
     description: JSON input payload used for inference.
 
   expected_response:
-    type: uri_file
+    type: string
     optional: true
     description: JSON file containing the expected inference response.
 
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index 45b1bb8575..20234e5de5 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -119,8 +119,8 @@ inputs:
     optional: true
     description: ID of the validation run (used for updating status in self-serve)
 
-  expected_inference_response:
-    type: uri_file
+  inference_response:
+    type: string
     optional: true
     description: JSON file containing the expected inference response.
 
@@ -147,7 +147,7 @@ jobs:
     outputs:
       model_deployment_details:
         type: uri_file
-      inference_response:
+      model_inference_response:
         type: uri_file
 
   run_inference_validation:
@@ -159,8 +159,8 @@ jobs:
       validation_id: ${{parent.inputs.validation_id}}
       sku: ${{parent.inputs.instance_type}}
       inference_payload: ${{parent.inputs.inference_payload}}
-      expected_response: ${{parent.inputs.expected_inference_response}}
-      inference_response: ${{parent.jobs.online_deployment_model.outputs.inference_response}}
+      expected_response: ${{parent.inputs.inference_response}}
+      inference_response: ${{parent.jobs.online_deployment_model.outputs.model_inference_response}}
     outputs:
       validation_result: ${{parent.outputs.validation_result}}
 
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 56cb1e4f69..1209944ebe 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -94,7 +94,6 @@ def save_validation_result(request_details, output_path, validation_id, sku, sta
         current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         validation_result = {
             "id": validation_id,
-            "runId": validation_id,
             "sku": sku,
             "createdTime": current_time,
             "updatedTime": current_time,
@@ -246,16 +245,32 @@ def main():
 
         inference_payload = json.loads(decoded_str)
 
+    expected_response = None
+    if args.expected_response:
+        decoded_bytes = base64.b64decode(args.expected_response)
+
+        # Convert bytes to string
+        decoded_str = decoded_bytes.decode('utf-8')
+        logger.info(f"Decoded string: {decoded_str}")
+        expected_response = json.loads(decoded_str)
+
+
     inference_output = load_json(args.inference_response)
     if not inference_output:
         logger.error("Inference output is missing or invalid.")
         sys.exit(1)
 
-    inference_output = load_json(args.inference_response)
+    inference_response = inference_output.get("response")
+    if isinstance(inference_response, str):
+        try:
+            inference_response = json.loads(inference_response)
+        except json.JSONDecodeError as e:
+            logger.warning(f"Failed to parse actualResponse as JSON: {e}")
 
-    expected_response = load_json(args.expected_response) if args.expected_response else None
+    if inference_response is None:
+        logger.warning("Actual response is missing or invalid. Setting it to an empty structure.")
+        inference_response = {}
 
-    inference_response = inference_output.get("response")
     inference_time = inference_output.get("inference_time", 0)
     logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, "
                 f"actual response: {inference_response}")
@@ -265,10 +280,11 @@ def main():
     status = "Success" if success_status else "Failed"
 
     request_details = {
-        "inputRequest": inference_payload,
-        "inputResponse": expected_response,
+        "providedRequest": inference_payload,
+        "providedResponse": expected_response,
         "actualResponse": inference_response,
         "responseTimeMs": inference_time,
+        "errorMessage": None,
         "structuralDiff": None,
     }
     if expected_response:

From 62c659f77beba227942cabd43e310eeb5ac78909 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Mon, 14 Apr 2025 09:48:49 +0000
Subject: [PATCH 16/29] Updated the validation result file extension

---
 .../src/publish_validation_results_selfserve.py   | 15 +++++++++++++--
 .../src/run_inference_validation.py               |  6 +++++-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index 33e61f739b..c327b48c1c 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -76,12 +76,23 @@ def update_model_onboarding_version(
         if metrics_path_dict.get("api_inference_path", None):
             validation_result.append({
                 "Id": validation_id,
-                "runId": validation_id,
                 "type": "API_VALIDATION",
                 "passed": True,
                 "message": "API inference passed successfully",
                 "validationResultUrl": metrics_path_dict.get("api_inference_path"),
-                "status": "success",
+                "status": "Completed",
+                "createdTime": current_time,
+                "updatedTime": current_time,
+                "sku": sku
+            })
+        else:
+            validation_result.append({
+                "Id": validation_id,
+                "type": "API_VALIDATION",
+                "passed": True,
+                "message": "API inference passed successfully",
+                "validationResultUrl": metrics_path_dict.get("api_inference_path"),
+                "status": "Failed",
                 "createdTime": current_time,
                 "updatedTime": current_time,
                 "sku": sku
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 1209944ebe..0deeb40504 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -91,6 +91,9 @@ def compare_structures(expected_response, actual_response):
 def save_validation_result(request_details, output_path, validation_id, sku, status):
     """Save validation results to a JSON file."""
     try:
+        if not output_path.endswith(".json"):
+            output_path += ".json"
+
         current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         validation_result = {
             "id": validation_id,
@@ -122,8 +125,9 @@ def fetch_storage_uri():
         output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']
         output_data_path = output_data['path']
 
+        if not output_data_path.endswith(".json"):
+            output_data_path += ".json"
         output_data_uri = replace_name_in_path(output_data_path, run.id)
-
         # Extract datastore name and path from the AzureML URI
         datastore_name, path = extract_datastore_info(output_data_uri)
 

From 82262e90cd012c2827c49a8ca50d10ea6eee573a Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Tue, 15 Apr 2025 03:24:17 +0000
Subject: [PATCH 17/29] Download validation result with json file extension

---
 .../run_inference_validation/spec.yaml          |  2 +-
 .../src/run_inference_validation.py             | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index 3c599b548a..bbc259828f 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -48,7 +48,7 @@ inputs:
 
 outputs:
   validation_result:
-    type: uri_file
+    type: uri_folder
     description: JSON file containing the validation results.
   metrics_storage_uri:
     type: uri_file
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 0deeb40504..780e592b11 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -88,11 +88,11 @@ def compare_structures(expected_response, actual_response):
     }
 
 
-def save_validation_result(request_details, output_path, validation_id, sku, status):
+def save_validation_result(request_details, output_dir, validation_id, sku, status):
     """Save validation results to a JSON file."""
     try:
-        if not output_path.endswith(".json"):
-            output_path += ".json"
+        os.makedirs(output_dir, exist_ok=True)
+        output_path = os.path.join(output_dir, "validation_result.json")
 
         current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         validation_result = {
@@ -125,18 +125,19 @@ def fetch_storage_uri():
         output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']
         output_data_path = output_data['path']
 
-        if not output_data_path.endswith(".json"):
-            output_data_path += ".json"
         output_data_uri = replace_name_in_path(output_data_path, run.id)
         # Extract datastore name and path from the AzureML URI
         datastore_name, path = extract_datastore_info(output_data_uri)
 
         # Construct the storage URI
         storage_uri = get_storage_url(datastore_name)
-        full_storage_uri = f"{storage_uri}/{path}"
-        logger.info(f"Full storage URI: {full_storage_uri}")
+        folder_uri = f"{storage_uri}/{path}"
+        # Construct the full path to the validation_result.json file
+        full_file_uri = f"{folder_uri}/validation_result.json"
 
-        return full_storage_uri
+        logger.info(f"Full storage URI (file): {full_file_uri}")
+
+        return full_file_uri  # This is the full path to validation_result.json
     except Exception as e:
         logger.error(f"Error fetching storage URI: {e}")
         return None

From e57a8b1a45d6c41aed52a5dd86deb1833982870b Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Wed, 16 Apr 2025 06:07:47 +0000
Subject: [PATCH 18/29] Updated the instance_type sku list

---
 assets/common/components/deploy_model/spec.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index f97bb2f870..066afde4ac 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -111,6 +111,9 @@ inputs:
       - Standard_NC24s_v2
       - Standard_NC24s_v3
       - Standard_NC24rs_v3
+      - Standard_NC24ads_A100_v4
+      - Standard_NC48ads_A100_v4
+      - Standard_NC96ads_A100_v4
       - Standard_NC64as_T4_v3
       - Standard_ND40rs_v2
       - Standard_ND96asr_v4

From 16860cd4498ef53a210639ef38a19a8e33f5a985 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Tue, 22 Apr 2025 14:16:33 +0000
Subject: [PATCH 19/29] Maap self serve save validation result fix

---
 assets/common/src/deploy.py                      |  2 +-
 .../run_inference_validation/spec.yaml           |  4 ++--
 .../validate_model_inference/spec.yaml           | 16 +++++-----------
 .../src/run_inference_validation.py              | 13 +++++++++----
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index a4896c4dd7..5960f8c1da 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -191,7 +191,7 @@ def parse_args():
 
 def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args):
     """Create endpoint and deployment and return details."""
-    endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="key")
+    endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token")
 
     # deployment
     deployment = ManagedOnlineDeployment(
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index bbc259828f..0af7fb5918 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -18,7 +18,7 @@ command: >-
   --inference_response ${{inputs.inference_response}}
   --validation-id ${{inputs.validation_id}}
   --sku ${{inputs.sku}}
-  --validation_result ${{outputs.validation_result}}
+  --validation_results ${{outputs.validation_results}}
   --metrics_storage_uri ${{outputs.metrics_storage_uri}}
 
 inputs:
@@ -47,7 +47,7 @@ inputs:
     description: ID of the validation run (used for updating status in self-serve)
 
 outputs:
-  validation_result:
+  validation_results:
     type: uri_folder
     description: JSON file containing the validation results.
   metrics_storage_uri:
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index 20234e5de5..3fcc788a81 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -65,7 +65,7 @@ inputs:
 
   model_id:
     type: string
-    optional: true 
+    optional: false
     description: |
       Asset ID of the model registered in workspace/registry.
       Registry - azureml://registries/<registry-name>/models/<model-name>/versions/<version>
@@ -100,7 +100,7 @@ inputs:
 
   inference_payload:
     type: string
-    optional: true
+    optional: false
     description: JSON payload which would be used to validate deployment
 
   endpoint_name:
@@ -126,9 +126,9 @@ inputs:
 
 # Pipeline outputs
 outputs:
-  validation_result:
+  validation_results:
     description: Output file containing the validation results.
-    type: uri_file
+    type: uri_folder
 
 jobs:
   online_deployment_model:
@@ -142,8 +142,6 @@ jobs:
       deployment_name: ${{parent.inputs.deployment_name}}
       instance_type: ${{parent.inputs.instance_type}}
       instance_count: ${{parent.inputs.instance_count}}
-    identity:
-      type: user_identity
     outputs:
       model_deployment_details:
         type: uri_file
@@ -153,8 +151,6 @@ jobs:
   run_inference_validation:
     type: command
     component: azureml:run_inference_validation:0.0.1
-    identity:
-      type: user_identity
     inputs:
       validation_id: ${{parent.inputs.validation_id}}
       sku: ${{parent.inputs.instance_type}}
@@ -162,7 +158,7 @@ jobs:
       expected_response: ${{parent.inputs.inference_response}}
       inference_response: ${{parent.jobs.online_deployment_model.outputs.model_inference_response}}
     outputs:
-      validation_result: ${{parent.outputs.validation_result}}
+      validation_results: ${{parent.outputs.validation_results}}
 
   delete_endpoints:
     type: command
@@ -170,8 +166,6 @@ jobs:
     inputs:
       model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
-    identity:
-      type: user_identity
     compute: ${{parent.inputs.compute}}
 
   publish_results:
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 780e592b11..b5012d2e15 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -91,8 +91,12 @@ def compare_structures(expected_response, actual_response):
 def save_validation_result(request_details, output_dir, validation_id, sku, status):
     """Save validation results to a JSON file."""
     try:
+        logger.info(f"Saving validation result to {output_dir}")
+        # Create the output directory if it doesn't exist
         os.makedirs(output_dir, exist_ok=True)
+        logger.info(f"Output directory: {output_dir}")
         output_path = os.path.join(output_dir, "validation_result.json")
+        logger.info(f"Output path: {output_path}")
 
         current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         validation_result = {
@@ -110,6 +114,7 @@ def save_validation_result(request_details, output_dir, validation_id, sku, stat
         logger.info(f"Validation result saved to {output_path}")
     except Exception as e:
         logger.error(f"Error saving validation result: {e}")
+        raise Exception(f"Failed to get MSI credentials : {e}")
 
 
 def replace_name_in_path(path_template, name_value):
@@ -122,7 +127,7 @@ def fetch_storage_uri():
     try:
         run = Run.get_context()
         run_details = run.get_details()
-        output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']
+        output_data = run_details['runDefinition']['outputData']['validation_results']['outputLocation']['uri']
         output_data_path = output_data['path']
 
         output_data_uri = replace_name_in_path(output_data_path, run.id)
@@ -228,7 +233,7 @@ def main():
                         help="Path to the expected inference response JSON file.")
     parser.add_argument("--inference_response", type=str, required=True,
                         help="Path to the actual inference response JSON file.")
-    parser.add_argument("--validation_result", type=str, required=True,
+    parser.add_argument("--validation_results", type=str, required=True,
                         help="Path to save validation results.")
     parser.add_argument("--metrics_storage_uri", type=str, required=True,
                         help="Path to store the metrics.")
@@ -297,8 +302,8 @@ def main():
         request_details["structuralDiff"] = comparison_result.get("structural_difference", [])
 
     # Save the validation result.
-    save_validation_result(request_details, args.validation_result, args.validation_id, args.sku, status)
-    logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_result}")
+    save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, status)
+    logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_results}")
 
     store_metrics_paths(args.metrics_storage_uri)
 

From 13861db17b03c65b4ef40e948dd7d178e5898f8e Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Tue, 22 Apr 2025 15:10:40 +0000
Subject: [PATCH 20/29] Maap self serve save validation result fix

---
 .../components/validate_model_inference/spec.yaml              | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index 3fcc788a81..3ffb775a32 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -50,6 +50,9 @@ inputs:
       - Standard_NC24s_v2
       - Standard_NC24s_v3
       - Standard_NC24rs_v3
+      - Standard_NC24ads_A100_v4
+      - Standard_NC48ads_A100_v4
+      - Standard_NC96ads_A100_v4
       - Standard_NC64as_T4_v3
       - Standard_ND40rs_v2
       - Standard_ND96asr_v4

From d41d268ef090b6aefb2a070a24b7a13ea0323ef7 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Wed, 7 May 2025 12:30:36 +0000
Subject: [PATCH 21/29] Publish validation result for both success and failure
 case

---
 .../spec.yaml                                 |  4 +--
 .../run_inference_validation/spec.yaml        |  3 +-
 .../publish_validation_results_selfserve.py   | 34 ++++++++-----------
 .../src/run_inference_validation.py           | 28 ++++++++-------
 4 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
index 531eb689a0..6eba5e8153 100644
--- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
+++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
@@ -39,7 +39,7 @@ inputs:
     description: ID of the validation run (used for updating status in self-serve)
   metrics_storage_uri:
     type: uri_file
-    optional: false
+    optional: true
     mode: ro_mount
     description: Path to the file containing the validation metrics csv storage path
 
@@ -53,4 +53,4 @@ command: >-
   --publisher-name ${{inputs.publisher_name}} 
   --validation-id ${{inputs.validation_id}} 
   --sku ${{inputs.sku}} 
-  --metrics-storage-uri ${{inputs.metrics_storage_uri}}
\ No newline at end of file
+  $[[ --metrics-storage-uri ${{inputs.metrics_storage_uri}}]]
\ No newline at end of file
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index 0af7fb5918..9b05530d46 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -15,7 +15,7 @@ command: >-
   python run_inference_validation.py
   --inference_payload ${{inputs.inference_payload}}
   $[[--expected_response ${{inputs.expected_response}}]]
-  --inference_response ${{inputs.inference_response}}
+  $[[--inference_response ${{inputs.inference_response}}]]
   --validation-id ${{inputs.validation_id}}
   --sku ${{inputs.sku}}
   --validation_results ${{outputs.validation_results}}
@@ -33,6 +33,7 @@ inputs:
 
   inference_response:
     type: uri_file
+    optional: true
     description: JSON file containing the actual inference response from the deployed model.
 
   sku:
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index c327b48c1c..727efdbf44 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -67,39 +67,33 @@ def update_model_onboarding_version(
 ):
     """Update model onboarding version with benchmark results."""
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-
-    metrics_path_dict = read_results_from_file(metrics_storage_uri)
+    if not metrics_storage_uri:
+        validation_success = False
+        metrics_url = None
+    else:
+        metrics_path_dict = read_results_from_file(metrics_storage_uri)
+        metrics_url = metrics_path_dict.get(
+                "api_inference_path") if metrics_path_dict else None
+        validation_success = metrics_url is not None
 
     validation_result = []
+    logger.info(f"validation_success: {validation_success}, metrics_url: {metrics_url}, metrics_storage_uri: {metrics_storage_uri}")
 
     if validation_id:
-        if metrics_path_dict.get("api_inference_path", None):
-            validation_result.append({
-                "Id": validation_id,
-                "type": "API_VALIDATION",
-                "passed": True,
-                "message": "API inference passed successfully",
-                "validationResultUrl": metrics_path_dict.get("api_inference_path"),
-                "status": "Completed",
-                "createdTime": current_time,
-                "updatedTime": current_time,
-                "sku": sku
-            })
-        else:
-            validation_result.append({
+        validation_result.append({
                 "Id": validation_id,
                 "type": "API_VALIDATION",
                 "passed": True,
                 "message": "API inference passed successfully",
-                "validationResultUrl": metrics_path_dict.get("api_inference_path"),
-                "status": "Failed",
+                "validationResultUrl": metrics_url,
+                "status": "Completed" if validation_success else "Failed",
                 "createdTime": current_time,
                 "updatedTime": current_time,
                 "sku": sku
             })
     else:
         logger.error(
-            "Validation  ID is None, not updating validation results in self-serve")
+            "Validation ID is None, not updating validation results in self-serve")
         sys.exit(1)
 
     payload = {
@@ -157,7 +151,7 @@ def update_model_onboarding_version(
                         help="Base URL of the model publisher self-serve API")
     parser.add_argument("--validation-id", required=True,
                         help="Run ID of the validation run")
-    parser.add_argument("--metrics-storage-uri", required=True,
+    parser.add_argument("--metrics-storage-uri", required=False,
                         help="URI to the storage where validation metrics are stored")
     parser.add_argument("--sku", required=False,
                         default="Standard_NC24ads_A100_v4",
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index b5012d2e15..9cbfccc33b 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -231,7 +231,7 @@ def main():
                         help="Serialized JSON payload for inference")
     parser.add_argument("--expected_response", type=str, required=False,
                         help="Path to the expected inference response JSON file.")
-    parser.add_argument("--inference_response", type=str, required=True,
+    parser.add_argument("--inference_response", type=str, required=False,
                         help="Path to the actual inference response JSON file.")
     parser.add_argument("--validation_results", type=str, required=True,
                         help="Path to save validation results.")
@@ -264,24 +264,26 @@ def main():
         logger.info(f"Decoded string: {decoded_str}")
         expected_response = json.loads(decoded_str)
 
+    inference_output = None
+    if args.inference_response:
+        inference_output = load_json(args.inference_response)
+        if not inference_output:
+            logger.error("Inference response is missing or invalid.")
 
-    inference_output = load_json(args.inference_response)
-    if not inference_output:
-        logger.error("Inference output is missing or invalid.")
-        sys.exit(1)
-
-    inference_response = inference_output.get("response")
-    if isinstance(inference_response, str):
-        try:
-            inference_response = json.loads(inference_response)
-        except json.JSONDecodeError as e:
-            logger.warning(f"Failed to parse actualResponse as JSON: {e}")
+    inference_response = None
+    if inference_output:
+        inference_response = inference_output.get("response")
+        if isinstance(inference_response, str):
+            try:
+                inference_response = json.loads(inference_response)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse actualResponse as JSON: {e}")
 
     if inference_response is None:
         logger.warning("Actual response is missing or invalid. Setting it to an empty structure.")
         inference_response = {}
 
-    inference_time = inference_output.get("inference_time", 0)
+    inference_time = inference_output.get("inference_time", 0) if inference_output else 0
     logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, "
                 f"actual response: {inference_response}")
 

From 298b076636f66d66395401a818b0ce5a12513be0 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Wed, 7 May 2025 15:41:41 +0000
Subject: [PATCH 22/29] Publish validation result for both success and failure
 case

---
 .../src/run_inference_validation.py           | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 9cbfccc33b..de83072f07 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -46,21 +46,21 @@ def load_json_from_string(json_string):
         return None
 
 
-def get_json_structure(data):
+def get_json_structure(data, parent_key=''):
     """
-    Recursively extract the structure of JSON (keys only).
-
-    For dictionaries, returns a dict of keys mapped to their structure.
-    For lists, returns a list with the structure of the first element.
-    For other types, returns None.
+    Recursively extract key paths from nested JSON.
     """
+    keys = set()
     if isinstance(data, dict):
-        return {key: get_json_structure(value) for key, value in data.items()}
-    elif isinstance(data, list) and len(data) > 0:
-        # Assume all elements share the same structure and return the structure of the first element.
-        return [get_json_structure(data[0])]
-    else:
-        return None
+        for k, v in data.items():
+            full_key = f"{parent_key}.{k}" if parent_key else k
+            keys.add(full_key)
+            keys.update(get_json_structure(v, full_key))
+    elif isinstance(data, list):
+        for index, item in enumerate(data):
+            full_key = f"{parent_key}[{index}]"
+            keys.update(get_json_structure(item, full_key))
+    return keys
 
 
 def compare_structures(expected_response, actual_response):
@@ -71,17 +71,20 @@ def compare_structures(expected_response, actual_response):
     """
     expected_structure = get_json_structure(expected_response)
     actual_structure = get_json_structure(actual_response)
-    logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}")
+    logger.info(f"Expected structure: {expected_structure}")
+    logger.info(f"Actual structure: {actual_structure}")
 
-    structure_match = expected_structure == actual_structure if expected_response else None
-    structural_difference = []
+    added_keys = actual_structure - expected_structure
+    removed_keys = expected_structure - actual_structure
+    structure_match = not added_keys and not removed_keys
 
-    if not structure_match:
-        structural_difference = [
-            {"expected": expected_structure, "actual": actual_structure}
-        ]
+    structural_difference = {
+        "added_keys": sorted(list(added_keys)),
+        "removed_keys": sorted(list(removed_keys)),
+    }
 
     logger.info(f"Structure match: {structure_match}, Structural differences: {structural_difference}")
+
     return {
         "structure_match": structure_match,
         "structural_difference": structural_difference

From 47e3422c42a035353e33df02c658d633794a0e1b Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Sat, 10 May 2025 18:46:25 +0000
Subject: [PATCH 23/29] Maap Self serve validation AML pipeline error message
 update

---
 .../common/components/deploy_model/spec.yaml  |   6 +-
 assets/common/src/deploy.py                   | 239 ++++++++-------
 .../spec.yaml                                 |  13 +-
 .../run_inference_validation/spec.yaml        |  12 +-
 .../validate_model_inference/spec.yaml        |  16 +-
 .../publish_validation_results_selfserve.py   |  28 +-
 .../src/run_inference_validation.py           | 285 ++++++++++++------
 7 files changed, 394 insertions(+), 205 deletions(-)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index 066afde4ac..03c2370004 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.12
+version: 0.0.12.27
 type: command
 
 is_deterministic: True
@@ -37,6 +37,7 @@ command: >-
   $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]]
   --model_deployment_details ${{outputs.model_deployment_details}}
   --model_inference_response ${{outputs.model_inference_response}}
+  --deploy_error ${{outputs.deploy_error}}
 
 inputs:
   # Output of registering component
@@ -215,6 +216,9 @@ outputs:
   model_inference_response:
     type: uri_file
     description: JSON file containing inference results
+  deploy_error:
+    type: uri_file
+    description: File containing error messages or stack traces from the validation step.
 
 tags:
     Preview: ""
diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 5960f8c1da..646373cb3d 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -7,6 +7,7 @@
 import re
 import time
 import base64
+import traceback
 
 from azure.ai.ml.entities import (
     ManagedOnlineEndpoint,
@@ -173,6 +174,11 @@ def parse_args():
         type=str,
         help="Path to the inference response JSON file.",
     )
+    parser.add_argument(
+        "--deploy_error",
+        type=str,
+        help="Path to the inference response JSON file.",
+    )
     # parse args
     args = parser.parse_args()
     logger.info(f"Args received {args}")
@@ -269,116 +275,145 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen
 def main():
     """Run main function."""
     args = parse_args()
+    logger.info(f"Arguments: {args}")
     ml_client = get_mlclient()
+    error_message = "None"
     # get registered model id
 
-    if args.model_id:
-        model_id = str(args.model_id)
-    elif args.registration_details_folder:
-        registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE
-        if registration_details_file.exists():
-            try:
-                with open(registration_details_file) as f:
-                    model_info = json.load(f)
-                model_id = model_info["id"]
-            except Exception as e:
-                raise Exception(f"model_registration_details json file is missing model information {e}.")
+    try:
+        if args.model_deployment_details:
+            with open(args.model_deployment_details, "w") as outfile:
+                json.dump({}, outfile)
+
+        if args.model_inference_response:
+            with open(args.model_inference_response, "w") as f:
+                json.dump({}, f, indent=4)
+
+        if args.deploy_error:
+            with open(args.deploy_error, "w") as error_file:
+                error_file.write(error_message)
+
+        if args.model_id:
+            model_id = str(args.model_id)
+        elif args.registration_details_folder:
+            registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE
+            if registration_details_file.exists():
+                try:
+                    with open(registration_details_file) as f:
+                        model_info = json.load(f)
+                    model_id = model_info["id"]
+                except Exception as e:
+                    raise Exception(f"model_registration_details json file is missing model information {e}.")
+            else:
+                raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.")
         else:
-            raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.")
-    else:
-        raise Exception("Arguments model_id and registration_details both are missing.")
-
-    # Endpoint has following restrictions:
-    # 1. Name must begin with lowercase letter
-    # 2. Followed by lowercase letters, hyphen or numbers
-    # 3. End with a lowercase letter or number
-
-    # 1. Replace underscores and slashes by hyphens and convert them to lower case.
-    # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it
-    model_name = get_model_name(model_id)
-
-    endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21]
-    endpoint_name = f"{endpoint_name}-{int(time.time())}"
-    endpoint_name = endpoint_name
-
-    endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name
-    deployment_name = args.deployment_name if args.deployment_name else "default"
-
-    endpoint, deployment = create_endpoint_and_deployment(
-        ml_client=ml_client,
-        endpoint_name=endpoint_name,
-        deployment_name=deployment_name,
-        model_id=model_id,
-        args=args
-    )
-
-    response = None
-    if args.inference_payload or args.inference_payload_str:
-        print("Invoking inference with test payload ...")
-        try:
-            start_time = time.time()
-            if args.inference_payload_str:
-                print(f"Inference payload string: {args.inference_payload_str}")
-                decoded_bytes = base64.b64decode(args.inference_payload_str)
-
-                # Convert bytes to string
-                decoded_str = decoded_bytes.decode('utf-8')
-                logger.info(f"Decoded string: {decoded_str}")
-
-                payload = json.loads(decoded_str)
-                logger.info(f"Payload:\n {payload}")
-
-                with open("payload.json", "w") as temp_file:
-                    json.dump(payload, temp_file)
+            raise Exception("Arguments model_id and registration_details both are missing.")
+
+        # Endpoint has following restrictions:
+        # 1. Name must begin with lowercase letter
+        # 2. Followed by lowercase letters, hyphen or numbers
+        # 3. End with a lowercase letter or number
+
+        # 1. Replace underscores and slashes by hyphens and convert them to lower case.
+        # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it
+        model_name = get_model_name(model_id)
+
+        endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21]
+        endpoint_name = f"{endpoint_name}-{int(time.time())}"
+        endpoint_name = endpoint_name
+
+        endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name
+        deployment_name = args.deployment_name if args.deployment_name else "default"
+
+        endpoint, deployment = create_endpoint_and_deployment(
+            ml_client=ml_client,
+            endpoint_name=endpoint_name,
+            deployment_name=deployment_name,
+            model_id=model_id,
+            args=args
+        )
 
-                response = ml_client.online_endpoints.invoke(
-                    endpoint_name=endpoint_name,
-                    deployment_name=deployment_name,
-                    request_file="payload.json",
-                )
-            elif args.inference_payload:
-                response = ml_client.online_endpoints.invoke(
-                    endpoint_name=endpoint_name,
-                    deployment_name=deployment_name,
-                    request_file=args.inference_payload,
+        response = None
+        if args.inference_payload or args.inference_payload_str:
+            print("Invoking inference with test payload ...")
+            try:
+                start_time = time.time()
+                if args.inference_payload_str:
+                    print(f"Inference payload string: {args.inference_payload_str}")
+                    decoded_bytes = base64.b64decode(args.inference_payload_str)
+
+                    # Convert bytes to string
+                    decoded_str = decoded_bytes.decode('utf-8')
+                    logger.info(f"Decoded string: {decoded_str}")
+
+                    payload = json.loads(decoded_str)
+                    logger.info(f"Payload:\n {payload}")
+
+                    with open("payload.json", "w") as temp_file:
+                        json.dump(payload, temp_file)
+
+                    response = ml_client.online_endpoints.invoke(
+                        endpoint_name=endpoint_name,
+                        deployment_name=deployment_name,
+                        request_file="payload.json",
+                    )
+                elif args.inference_payload:
+                    response = ml_client.online_endpoints.invoke(
+                        endpoint_name=endpoint_name,
+                        deployment_name=deployment_name,
+                        request_file=args.inference_payload,
+                    )
+
+                end_time = time.time()
+                inference_time_ms = int((end_time - start_time) * 1000)
+
+                logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " +
+                            f"and response: {response}")
+                # Save inference response
+                if args.model_inference_response:
+                    inference_result = {
+                        "response": response,
+                        "inference_time": inference_time_ms
+                    }
+                    with open(args.model_inference_response, "w") as f:
+                        json.dump(inference_result, f, indent=4)
+                    logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}")
+            except Exception as e:
+                raise AzureMLException._with_error(
+                    AzureMLError.create(OnlineEndpointInvocationError, exception=e)
                 )
 
-            end_time = time.time()
-            inference_time_ms = int((end_time - start_time) * 1000)
-
-            logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " +
-                        f"and response: {response}")
-            # Save inference response
-            if args.model_inference_response:
-                inference_result = {
-                    "response": response,
-                    "inference_time": inference_time_ms
-                }
-                with open(args.model_inference_response, "w") as f:
-                    json.dump(inference_result, f, indent=4)
-                logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}")
-        except Exception as e:
-            raise AzureMLException._with_error(
-                AzureMLError.create(OnlineEndpointInvocationError, exception=e)
-            )
+        print("Saving deployment details ...")
+
+        # write deployment details to file
+        endpoint_type = "aml_online_inference"
+        deployment_details = {
+            "endpoint_name": endpoint.name,
+            "deployment_name": deployment.name,
+            "endpoint_uri": endpoint.__dict__["_scoring_uri"],
+            "endpoint_type": endpoint_type,
+            "instance_type": args.instance_type,
+            "instance_count": args.instance_count,
+            "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance,
+        }
+        json_object = json.dumps(deployment_details, indent=4)
+        with open(args.model_deployment_details, "w") as outfile:
+            outfile.write(json_object)
+        logger.info("Saved deployment details in output json file.")
 
-    print("Saving deployment details ...")
-
-    # write deployment details to file
-    endpoint_type = "aml_online_inference"
-    deployment_details = {
-        "endpoint_name": endpoint.name,
-        "deployment_name": deployment.name,
-        "endpoint_uri": endpoint.__dict__["_scoring_uri"],
-        "endpoint_type": endpoint_type,
-        "instance_type": args.instance_type,
-        "instance_count": args.instance_count,
-        "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance,
-    }
-    json_object = json.dumps(deployment_details, indent=4)
-    with open(args.model_deployment_details, "w") as outfile:
-        outfile.write(json_object)
-    logger.info("Saved deployment details in output json file.")
+    except Exception as e:
+        # Capture the full traceback
+        stack_trace = traceback.format_exc()
+        error_message = f"Model deployment failed.\n{stack_trace}"
+        logger.error(f"error_message from stack trace: {error_message}, deploy_error_path: {args.deploy_error}")
+
+        # Write the error message to the specified error output file
+        if args.deploy_error:
+            with open(args.deploy_error, "w") as error_file:
+                error_file.write(error_message)
+
+        # Re-raise the exception with the full traceback
+        # raise Exception(error_message)
 
 
 if __name__ == "__main__":
diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
index 6eba5e8153..1c9a80bd95 100644
--- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
+++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
@@ -3,7 +3,7 @@ type: command
 is_deterministic: true
 
 name: publish_validation_results_selfserve
-version: 0.0.1
+version: 0.0.1.22
 display_name: Publish model validation results to Self-Serve
 description: |
   This component publishes model validation results to the Self-Serve database.
@@ -42,6 +42,14 @@ inputs:
     optional: true
     mode: ro_mount
     description: Path to the file containing the validation metrics csv storage path
+  # deploy_error:
+  #   type: uri_file
+  #   optional: true
+  #   description: Error message or stack trace from the deployment step
+  validation_error:
+    type: uri_file
+    optional: true
+    description: Error message or stack trace from the inference validation step
 
 code: ../../src
 
@@ -53,4 +61,5 @@ command: >-
   --publisher-name ${{inputs.publisher_name}} 
   --validation-id ${{inputs.validation_id}} 
   --sku ${{inputs.sku}} 
-  $[[ --metrics-storage-uri ${{inputs.metrics_storage_uri}}]]
\ No newline at end of file
+  $[[ --metrics-storage-uri ${{inputs.metrics_storage_uri}}]]
+  $[[ --validation-error ${{inputs.validation_error}}]]
\ No newline at end of file
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index 9b05530d46..bf8a25e1b0 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: run_inference_validation
-version: 0.0.1
+version: 0.0.1.65
 type: command
 
 is_deterministic: True
@@ -16,10 +16,12 @@ command: >-
   --inference_payload ${{inputs.inference_payload}}
   $[[--expected_response ${{inputs.expected_response}}]]
   $[[--inference_response ${{inputs.inference_response}}]]
+  $[[--deployment_error ${{inputs.deployment_error}}]]
   --validation-id ${{inputs.validation_id}}
   --sku ${{inputs.sku}}
   --validation_results ${{outputs.validation_results}}
   --metrics_storage_uri ${{outputs.metrics_storage_uri}}
+  --validation_error ${{outputs.validation_error}}
 
 inputs:
   inference_payload:
@@ -46,6 +48,11 @@ inputs:
     type: string
     optional: false
     description: ID of the validation run (used for updating status in self-serve)
+  
+  deployment_error:
+    type: uri_file
+    optional: true
+    description: Error message or stack trace from the inference validation step
 
 outputs:
   validation_results:
@@ -54,6 +61,9 @@ outputs:
   metrics_storage_uri:
     type: uri_file
     description: JSON file containing the validation metrics csv storage path
+  validation_error:
+    type: uri_file
+    description: File containing error messages or stack traces from the validation step.
 
 tags:
     Preview: ""
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index 3ffb775a32..a9c14275a8 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -4,7 +4,7 @@ type: pipeline
 name: validate_model_inference
 display_name: Validate Model Inference
 description: deploy a model and validate it using a sample payload
-version: 0.0.1
+version: 0.0.1.74
 
 inputs:
   compute:
@@ -136,7 +136,7 @@ outputs:
 jobs:
   online_deployment_model:
     type: command
-    component: azureml:deploy_model:0.0.12
+    component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.27
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
@@ -150,22 +150,27 @@ jobs:
         type: uri_file
       model_inference_response:
         type: uri_file
+      deploy_error:
+        type: uri_file
 
   run_inference_validation:
     type: command
-    component: azureml:run_inference_validation:0.0.1
+    component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.65
     inputs:
       validation_id: ${{parent.inputs.validation_id}}
       sku: ${{parent.inputs.instance_type}}
       inference_payload: ${{parent.inputs.inference_payload}}
       expected_response: ${{parent.inputs.inference_response}}
       inference_response: ${{parent.jobs.online_deployment_model.outputs.model_inference_response}}
+      deployment_error: ${{parent.jobs.online_deployment_model.outputs.deploy_error}}
     outputs:
       validation_results: ${{parent.outputs.validation_results}}
+      validation_error:
+        type: uri_file
 
   delete_endpoints:
     type: command
-    component: azureml:delete_endpoint:0.0.7
+    component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1
     inputs:
       model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
@@ -173,7 +178,7 @@ jobs:
 
   publish_results:
     type: command
-    component: azureml:publish_validation_results_selfserve:0.0.1
+    component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.22
     inputs:
       publisher_name: ${{parent.inputs.publisher_name}}
       model_name: ${{parent.inputs.model_name}}
@@ -182,3 +187,4 @@ jobs:
       validation_id: ${{parent.inputs.validation_id}}
       selfserve_base_url: ${{parent.inputs.selfserve_base_url}}
       metrics_storage_uri: ${{parent.jobs.run_inference_validation.outputs.metrics_storage_uri}}
+      validation_error: ${{parent.jobs.run_inference_validation.outputs.validation_error}}
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index 727efdbf44..11779360d0 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -63,7 +63,8 @@ def update_model_onboarding_version(
     sku,
     validation_id,
     selfserve_base_url,
-    metrics_storage_uri
+    metrics_storage_uri,
+    error_message
 ):
     """Update model onboarding version with benchmark results."""
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -86,6 +87,7 @@ def update_model_onboarding_version(
                 "passed": True,
                 "message": "API inference passed successfully",
                 "validationResultUrl": metrics_url,
+                "errorMessage": error_message if error_message else None,
                 "status": "Completed" if validation_success else "Failed",
                 "createdTime": current_time,
                 "updatedTime": current_time,
@@ -156,10 +158,31 @@ def update_model_onboarding_version(
     parser.add_argument("--sku", required=False,
                         default="Standard_NC24ads_A100_v4",
                         help="Suggested SKU based on benchmark results")
+    # parser.add_argument("--deploy-error", required=False,
+    #                     help="Path to the file containing deployment error messages or stack traces")
+    parser.add_argument("--validation-error", required=False,
+                        help="Path to the file containing validation error messages or stack traces")
 
     args = parser.parse_args()
     logger.info(f"Arguments: {args}")
 
+    error_message = ""
+    # if args.deploy_error:
+    #     try:
+    #         with open(args.deploy_error, "r") as f:
+    #             deploy_error_message = f.read().strip()
+    #             error_message += f"Deployment Error: {deploy_error_message}\n"
+    #     except Exception as e:
+    #         logger.warning(f"Failed to read deploy_error file: {e}")
+
+    if args.validation_error:
+        try:
+            with open(args.validation_error, "r") as f:
+                validation_error_message = f.read().strip()
+                error_message += f"Validation Error: {validation_error_message}\n"
+        except Exception as e:
+            logger.warning(f"Failed to read validation_error file: {e}")
+
     try:
         result = update_model_onboarding_version(
             args.publisher_name,
@@ -168,7 +191,8 @@ def update_model_onboarding_version(
             args.sku,
             args.validation_id,
             args.selfserve_base_url,
-            args.metrics_storage_uri
+            args.metrics_storage_uri,
+            error_message
         )
         logger.info("Model onboarding version update completed successfully")
     except Exception as e:
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index de83072f07..71c47ff5ed 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -8,6 +8,8 @@
 import argparse
 import os
 import sys
+import traceback
+import re
 from datetime import datetime, timezone
 from azureml.core import Run
 from azureml.model.mgmt.utils.common_utils import get_mlclient
@@ -46,50 +48,104 @@ def load_json_from_string(json_string):
         return None
 
 
-def get_json_structure(data, parent_key=''):
+def set_nested_value(d, keys, value):
     """
-    Recursively extract key paths from nested JSON.
+    Helper to set a value into a nested dictionary/list from a list of keys/indexes.
     """
-    keys = set()
+    for i, key in enumerate(keys):
+        is_last = i == len(keys) - 1
+        if isinstance(key, int):
+            while len(d) <= key:
+                d.append({} if not is_last else None)
+            if is_last:
+                d[key] = value
+            else:
+                if not isinstance(d[key], (dict, list)):
+                    d[key] = {}
+                d = d[key]
+        else:
+            if key not in d or not isinstance(d[key], (dict, list)):
+                d[key] = {} if not is_last else None
+            if is_last:
+                d[key] = value
+            else:
+                d = d[key]
+
+def parse_key_path(key):
+    """
+    Converts a key string like '[0].a.b[1]' to a list of keys: [0, 'a', 'b', 1]
+    """
+    parts = re.findall(r'\[(\d+)\]|([^.]+)', key)
+    return [int(i) if i else j for i, j in parts]
+
+def build_nested_json(flat_dict):
+    """
+    Converts a flat key-path dictionary to nested JSON.
+    """
+    result = {} if flat_dict else None
+    for key_path, value in flat_dict.items():
+        keys = parse_key_path(key_path)
+        if isinstance(keys[0], int):
+            if not isinstance(result, list):
+                result = []
+        set_nested_value(result, keys, value)
+    return result
+
+def get_json_structure_with_values(data, parent_key=''):
+    """
+    Recursively extract key paths and their values from nested JSON.
+    Returns a dictionary of full_key_path: value
+    """
+    items = {}
     if isinstance(data, dict):
         for k, v in data.items():
             full_key = f"{parent_key}.{k}" if parent_key else k
-            keys.add(full_key)
-            keys.update(get_json_structure(v, full_key))
+            if isinstance(v, (dict, list)):
+                items.update(get_json_structure_with_values(v, full_key))
+            else:
+                items[full_key] = v
     elif isinstance(data, list):
         for index, item in enumerate(data):
-            full_key = f"{parent_key}[{index}]"
-            keys.update(get_json_structure(item, full_key))
-    return keys
-
+            full_key = f"{parent_key}[{index}]" if parent_key else f"[{index}]"
+            if isinstance(item, (dict, list)):
+                items.update(get_json_structure_with_values(item, full_key))
+            else:
+                items[full_key] = item
+    return items
 
 def compare_structures(expected_response, actual_response):
     """
-    Compare JSON structures (keys only) of expected and actual.
-
-    Returns a dictionary with structural differences and a match flag.
+    Compare JSON structures and return full nested added/removed diffs.
     """
-    expected_structure = get_json_structure(expected_response)
-    actual_structure = get_json_structure(actual_response)
-    logger.info(f"Expected structure: {expected_structure}")
-    logger.info(f"Actual structure: {actual_structure}")
-
-    added_keys = actual_structure - expected_structure
-    removed_keys = expected_structure - actual_structure
-    structure_match = not added_keys and not removed_keys
-
-    structural_difference = {
-        "added_keys": sorted(list(added_keys)),
-        "removed_keys": sorted(list(removed_keys)),
-    }
+    expected_structure = get_json_structure_with_values(expected_response)
+    actual_structure = get_json_structure_with_values(actual_response)
 
-    logger.info(f"Structure match: {structure_match}, Structural differences: {structural_difference}")
+    logger.info(f"Expected flat structure: {expected_structure}")
+    logger.info(f"Actual flat structure: {actual_structure}")
 
-    return {
+    added_keys = actual_structure.keys() - expected_structure.keys()
+    removed_keys = expected_structure.keys() - actual_structure.keys()
+
+    added_flat = {key: actual_structure[key] for key in added_keys}
+    removed_flat = {key: expected_structure[key] for key in removed_keys}
+
+    added_nested = build_nested_json(added_flat)
+    removed_nested = build_nested_json(removed_flat)
+
+    structure_match = not added_flat and not removed_flat
+
+    result = {
         "structure_match": structure_match,
-        "structural_difference": structural_difference
+        "structural_difference": {
+            "added": added_nested,
+            "removed": removed_nested
+        }
     }
 
+    logger.info("Comparison result:")
+    logger.info(json.dumps(result, indent=4))
+
+    return result
 
 def save_validation_result(request_details, output_dir, validation_id, sku, status):
     """Save validation results to a JSON file."""
@@ -227,6 +283,109 @@ def extract_datastore_info(datastore_uri_path):
     return None, None
 
 
+def run_inference_validation(args):
+    """Perform the inference validation logic."""
+    try:
+        error_message = ""
+        if args.deployment_error:
+            try:
+                with open(args.deployment_error, "r") as f:
+                    deployment_error = f.read().strip()
+                    error_message += deployment_error
+            except Exception as e:
+                logger.warning(f"Failed to read validation_error file: {e}")
+
+        if args.validation_error:
+            with open(args.validation_error, "w") as error_file:
+                error_file.write(error_message)
+        inference_payload = None
+        if args.inference_payload:
+            decoded_bytes = base64.b64decode(args.inference_payload)
+
+            # Convert bytes to string
+            decoded_str = decoded_bytes.decode('utf-8')
+            logger.info(f"Decoded string: {decoded_str}")
+
+            inference_payload = json.loads(decoded_str)
+
+        expected_response = None
+        if args.expected_response:
+            decoded_bytes = base64.b64decode(args.expected_response)
+
+            # Convert bytes to string
+            decoded_str = decoded_bytes.decode('utf-8')
+            logger.info(f"Decoded string: {decoded_str}")
+            expected_response = json.loads(decoded_str)
+
+        inference_output = None
+        if args.inference_response:
+            inference_output = load_json(args.inference_response)
+            if not inference_output:
+                logger.error("Inference response is missing or invalid.")
+
+        inference_response = None
+        if inference_output:
+            inference_response = inference_output.get("response")
+            if isinstance(inference_response, str):
+                try:
+                    inference_response = json.loads(inference_response)
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse actualResponse as JSON: {e}")
+
+        if inference_response is None:
+            logger.warning("Actual response is missing or invalid. Setting it to an empty structure.")
+            inference_response = {}
+
+        inference_time = inference_output.get("inference_time", 0) if inference_output else 0
+        logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, "
+                    f"actual response: {inference_response}")
+
+        # Infer success status based on the presence of a valid response
+        success_status = inference_response is not None and bool(inference_response)
+        status = "Success" if success_status else "Failed"
+
+        request_details = {
+            "providedRequest": inference_payload,
+            "providedResponse": expected_response,
+            "actualResponse": inference_response,
+            "responseTimeMs": inference_time,
+            "errorMessage": error_message,
+            "structuralDiff": None,
+        }
+        logger.info(f"Request details: {request_details}")
+        if expected_response and inference_response:
+            comparison_result = compare_structures(expected_response, inference_response)
+            request_details["structuralDiff"] = comparison_result.get("structural_difference", [])
+
+        # Save the validation result.
+        save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, status)
+        logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_results}")
+
+        store_metrics_paths(args.metrics_storage_uri)
+    except Exception as e:
+        stack_trace = traceback.format_exc()
+        error_message = f"Model validation failed.\n{stack_trace}"
+        logger.error(error_message)
+        # Save the error message in the request details
+        request_details = {
+            "providedRequest": None,
+            "providedResponse": None,
+            "actualResponse": None,
+            "responseTimeMs": 0,
+            "errorMessage": error_message,
+            "structuralDiff": None,
+        }
+
+        # Save the validation result with the error message
+        save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, "Failed")
+
+        # Write the error message to the specified error output file
+        if args.validation_error:
+            with open(args.validation_error, "w") as error_file:
+                error_file.write(error_message)
+        # raise Exception(f"Failed to run inference validation: {error_message}")
+
+
 def main():
     """Compare expected and actual inference response structures."""
     parser = argparse.ArgumentParser()
@@ -236,6 +395,8 @@ def main():
                         help="Path to the expected inference response JSON file.")
     parser.add_argument("--inference_response", type=str, required=False,
                         help="Path to the actual inference response JSON file.")
+    parser.add_argument("--deployment_error", type=str, required=False,
+                        help="Path to the deployment_error.")
     parser.add_argument("--validation_results", type=str, required=True,
                         help="Path to save validation results.")
     parser.add_argument("--metrics_storage_uri", type=str, required=True,
@@ -245,72 +406,12 @@ def main():
                         help="Suggested SKU based on benchmark results")
     parser.add_argument("--validation-id", required=True,
                         help="Run ID of the validation run")
+    parser.add_argument("--validation_error", type=str, required=False,
+                        help="Path to the file where error messages or stack traces will be written.")
 
     args = parser.parse_args()
-
-    inference_payload = None
-    if args.inference_payload:
-        decoded_bytes = base64.b64decode(args.inference_payload)
-
-        # Convert bytes to string
-        decoded_str = decoded_bytes.decode('utf-8')
-        logger.info(f"Decoded string: {decoded_str}")
-
-        inference_payload = json.loads(decoded_str)
-
-    expected_response = None
-    if args.expected_response:
-        decoded_bytes = base64.b64decode(args.expected_response)
-
-        # Convert bytes to string
-        decoded_str = decoded_bytes.decode('utf-8')
-        logger.info(f"Decoded string: {decoded_str}")
-        expected_response = json.loads(decoded_str)
-
-    inference_output = None
-    if args.inference_response:
-        inference_output = load_json(args.inference_response)
-        if not inference_output:
-            logger.error("Inference response is missing or invalid.")
-
-    inference_response = None
-    if inference_output:
-        inference_response = inference_output.get("response")
-        if isinstance(inference_response, str):
-            try:
-                inference_response = json.loads(inference_response)
-            except json.JSONDecodeError as e:
-                logger.warning(f"Failed to parse actualResponse as JSON: {e}")
-
-    if inference_response is None:
-        logger.warning("Actual response is missing or invalid. Setting it to an empty structure.")
-        inference_response = {}
-
-    inference_time = inference_output.get("inference_time", 0) if inference_output else 0
-    logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, "
-                f"actual response: {inference_response}")
-
-    # Infer success status based on the presence of a valid response
-    success_status = inference_response is not None and bool(inference_response)
-    status = "Success" if success_status else "Failed"
-
-    request_details = {
-        "providedRequest": inference_payload,
-        "providedResponse": expected_response,
-        "actualResponse": inference_response,
-        "responseTimeMs": inference_time,
-        "errorMessage": None,
-        "structuralDiff": None,
-    }
-    if expected_response:
-        comparison_result = compare_structures(expected_response, inference_response)
-        request_details["structuralDiff"] = comparison_result.get("structural_difference", [])
-
-    # Save the validation result.
-    save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, status)
-    logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_results}")
-
-    store_metrics_paths(args.metrics_storage_uri)
+    logger.info(f"Arguments: {args}")
+    run_inference_validation(args)
 
 
 if __name__ == "__main__":

From c1211a9859b8f7da9ab3487f1d444508a807d729 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Sun, 11 May 2025 13:58:58 +0000
Subject: [PATCH 24/29] Maap Self serve validation AML pipeline error message
 update

---
 assets/common/components/deploy_model/spec.yaml       |  2 +-
 assets/common/src/deploy.py                           | 11 +++++------
 .../publish_validation_results_selfserve/spec.yaml    |  6 +-----
 .../components/run_inference_validation/spec.yaml     |  2 +-
 .../components/validate_model_inference/spec.yaml     |  9 ++++-----
 .../src/publish_validation_results_selfserve.py       | 10 ----------
 .../model_management/src/run_inference_validation.py  | 10 +++++++---
 7 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
index 03c2370004..f4678e2426 100644
--- a/assets/common/components/deploy_model/spec.yaml
+++ b/assets/common/components/deploy_model/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: deploy_model
-version: 0.0.12.27
+version: 0.0.12.29
 type: command
 
 is_deterministic: True
diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 646373cb3d..204b04623e 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -274,13 +274,11 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen
 @swallow_all_exceptions(logger)
 def main():
     """Run main function."""
-    args = parse_args()
-    logger.info(f"Arguments: {args}")
-    ml_client = get_mlclient()
-    error_message = "None"
-    # get registered model id
-
     try:
+        args = parse_args()
+        logger.info(f"Arguments: {args}")
+        ml_client = get_mlclient()
+        error_message = ""
         if args.model_deployment_details:
             with open(args.model_deployment_details, "w") as outfile:
                 json.dump({}, outfile)
@@ -293,6 +291,7 @@ def main():
             with open(args.deploy_error, "w") as error_file:
                 error_file.write(error_message)
 
+        # get registered model id
         if args.model_id:
             model_id = str(args.model_id)
         elif args.registration_details_folder:
diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
index 1c9a80bd95..498fde2dc1 100644
--- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
+++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
@@ -3,7 +3,7 @@ type: command
 is_deterministic: true
 
 name: publish_validation_results_selfserve
-version: 0.0.1.22
+version: 0.0.1.23
 display_name: Publish model validation results to Self-Serve
 description: |
   This component publishes model validation results to the Self-Serve database.
@@ -42,10 +42,6 @@ inputs:
     optional: true
     mode: ro_mount
     description: Path to the file containing the validation metrics csv storage path
-  # deploy_error:
-  #   type: uri_file
-  #   optional: true
-  #   description: Error message or stack trace from the deployment step
   validation_error:
     type: uri_file
     optional: true
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index bf8a25e1b0..c5e9f5cfb0 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: run_inference_validation
-version: 0.0.1.65
+version: 0.0.1.66
 type: command
 
 is_deterministic: True
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index a9c14275a8..731b6cb4be 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -4,7 +4,7 @@ type: pipeline
 name: validate_model_inference
 display_name: Validate Model Inference
 description: deploy a model and validate it using a sample payload
-version: 0.0.1.74
+version: 0.0.1.76
 
 inputs:
   compute:
@@ -136,7 +136,7 @@ outputs:
 jobs:
   online_deployment_model:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.27
+    component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.29
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
@@ -155,7 +155,7 @@ jobs:
 
   run_inference_validation:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.65
+    component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.66
     inputs:
       validation_id: ${{parent.inputs.validation_id}}
       sku: ${{parent.inputs.instance_type}}
@@ -174,11 +174,10 @@ jobs:
     inputs:
       model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
-    compute: ${{parent.inputs.compute}}
 
   publish_results:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.22
+    component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.23
     inputs:
       publisher_name: ${{parent.inputs.publisher_name}}
       model_name: ${{parent.inputs.model_name}}
diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py
index 11779360d0..e6fa644f6e 100644
--- a/assets/training/model_management/src/publish_validation_results_selfserve.py
+++ b/assets/training/model_management/src/publish_validation_results_selfserve.py
@@ -158,8 +158,6 @@ def update_model_onboarding_version(
     parser.add_argument("--sku", required=False,
                         default="Standard_NC24ads_A100_v4",
                         help="Suggested SKU based on benchmark results")
-    # parser.add_argument("--deploy-error", required=False,
-    #                     help="Path to the file containing deployment error messages or stack traces")
     parser.add_argument("--validation-error", required=False,
                         help="Path to the file containing validation error messages or stack traces")
 
@@ -167,14 +165,6 @@ def update_model_onboarding_version(
     logger.info(f"Arguments: {args}")
 
     error_message = ""
-    # if args.deploy_error:
-    #     try:
-    #         with open(args.deploy_error, "r") as f:
-    #             deploy_error_message = f.read().strip()
-    #             error_message += f"Deployment Error: {deploy_error_message}\n"
-    #     except Exception as e:
-    #         logger.warning(f"Failed to read deploy_error file: {e}")
-
     if args.validation_error:
         try:
             with open(args.validation_error, "r") as f:
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 71c47ff5ed..0997b01521 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -283,9 +283,10 @@ def extract_datastore_info(datastore_uri_path):
     return None, None
 
 
-def run_inference_validation(args):
+def run_inference_validation():
     """Perform the inference validation logic."""
     try:
+        args = parse_args()
         error_message = ""
         if args.deployment_error:
             try:
@@ -385,8 +386,11 @@ def run_inference_validation(args):
                 error_file.write(error_message)
         # raise Exception(f"Failed to run inference validation: {error_message}")
 
-
 def main():
+    run_inference_validation()
+
+
+def parse_args():
     """Compare expected and actual inference response structures."""
     parser = argparse.ArgumentParser()
     parser.add_argument("--inference_payload", type=str, required=True,
@@ -411,7 +415,7 @@ def main():
 
     args = parser.parse_args()
     logger.info(f"Arguments: {args}")
-    run_inference_validation(args)
+    return args
 
 
 if __name__ == "__main__":

From 3ed891ba8bad2ef941c30c2f0a56775bf6ac4f91 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Tue, 13 May 2025 09:25:13 +0000
Subject: [PATCH 25/29] Maap Self serve validation AML pipeline error message
 update

---
 assets/training/model_management/src/run_inference_validation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 0997b01521..39164a0eb4 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -379,6 +379,7 @@ def run_inference_validation():
 
         # Save the validation result with the error message
         save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, "Failed")
+        store_metrics_paths(args.metrics_storage_uri)
 
         # Write the error message to the specified error output file
         if args.validation_error:

From 6faef71d3395f148ef40039a474b5afaee9d3def Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Mon, 19 May 2025 07:26:30 +0000
Subject: [PATCH 26/29] AML MaaP Self serve validation pipeline refactoring

---
 .../deploy_inference_model/spec.yaml          | 225 ++++++++++
 assets/common/src/deploy_inference_model.py   | 417 ++++++++++++++++++
 .../validate_model_inference/spec.yaml        |  10 +-
 .../src/run_inference_validation.py           |   5 +-
 4 files changed, 649 insertions(+), 8 deletions(-)
 create mode 100644 assets/common/components/deploy_inference_model/spec.yaml
 create mode 100644 assets/common/src/deploy_inference_model.py

diff --git a/assets/common/components/deploy_inference_model/spec.yaml b/assets/common/components/deploy_inference_model/spec.yaml
new file mode 100644
index 0000000000..efae05fea4
--- /dev/null
+++ b/assets/common/components/deploy_inference_model/spec.yaml
@@ -0,0 +1,225 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+name: deploy_inference_model
+version: 0.0.1
+type: command
+
+is_deterministic: True
+
+display_name: Deploy model
+description:
+  Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached.
+
+environment: azureml://registries/azureml/environments/python-sdk-v2/versions/29
+
+code: ../../src
+command: >-
+  python deploy_inference_model.py
+  $[[--registration_details_folder ${{inputs.registration_details_folder}}]]
+  $[[--model_id ${{inputs.model_id}}]]
+  $[[--inference_payload ${{inputs.inference_payload}}]]
+  $[[--inference_payload_str ${{inputs.inference_payload_str}}]]
+  $[[--endpoint_name ${{inputs.endpoint_name}}]]
+  $[[--deployment_name ${{inputs.deployment_name}}]]
+  $[[--instance_type ${{inputs.instance_type}}]]
+  $[[--instance_count ${{inputs.instance_count}}]]
+  $[[--max_concurrent_requests_per_instance ${{inputs.max_concurrent_requests_per_instance}}]] 
+  $[[--request_timeout_ms ${{inputs.request_timeout_ms}}]]
+  $[[--max_queue_wait_ms ${{inputs.max_queue_wait_ms}}]]
+  $[[--failure_threshold_readiness_probe ${{inputs.failure_threshold_readiness_probe}}]]
+  $[[--success_threshold_readiness_probe ${{inputs.success_threshold_readiness_probe}}]]
+  $[[--timeout_readiness_probe ${{inputs.timeout_readiness_probe}}]]
+  $[[--period_readiness_probe ${{inputs.period_readiness_probe}}]]
+  $[[--initial_delay_readiness_probe ${{inputs.initial_delay_readiness_probe}}]]
+  $[[--failure_threshold_liveness_probe ${{inputs.failure_threshold_liveness_probe}}]]
+  $[[--timeout_liveness_probe ${{inputs.timeout_liveness_probe}}]]
+  $[[--period_liveness_probe ${{inputs.period_liveness_probe}}]]
+  $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]]
+  $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]]
+  --model_deployment_details ${{outputs.model_deployment_details}}
+  --model_inference_response ${{outputs.model_inference_response}}
+  --deploy_error ${{outputs.deploy_error}}
+
+inputs:
+  # Output of registering component
+  registration_details_folder:
+    type: uri_folder
+    optional: true
+    description: Folder containing model registration details in a JSON file named model_registration_details.json
+
+  model_id:
+    type: string
+    optional: true 
+    description: |
+      Asset ID of the model registered in workspace/registry.
+      Registry - azureml://registries/<registry-name>/models/<model-name>/versions/<version>
+      Workspace - azureml:<model-name>:<version>
+
+  inference_payload:
+    type: uri_file
+    optional: true
+    description: JSON payload which would be used to validate deployment
+
+  inference_payload_str:
+    type: string
+    optional: true
+    description: Serialized JSON payload which would be used to validate deployment
+
+  endpoint_name:
+    type: string
+    optional: true
+    description: Name of the endpoint
+
+  deployment_name:
+    type: string
+    optional: true
+    default: default
+    description: Name of the deployment
+  
+  instance_type:
+    type: string
+    optional: true
+    enum:
+      - Standard_DS1_v2
+      - Standard_DS2_v2
+      - Standard_DS3_v2
+      - Standard_DS4_v2
+      - Standard_DS5_v2
+      - Standard_F2s_v2
+      - Standard_F4s_v2
+      - Standard_F8s_v2
+      - Standard_F16s_v2
+      - Standard_F32s_v2
+      - Standard_F48s_v2
+      - Standard_F64s_v2
+      - Standard_F72s_v2
+      - Standard_FX24mds
+      - Standard_FX36mds
+      - Standard_FX48mds
+      - Standard_E2s_v3
+      - Standard_E4s_v3
+      - Standard_E8s_v3
+      - Standard_E16s_v3
+      - Standard_E32s_v3
+      - Standard_E48s_v3
+      - Standard_E64s_v3
+      - Standard_NC4as_T4_v3
+      - Standard_NC6s_v2
+      - Standard_NC6s_v3
+      - Standard_NC8as_T4_v3
+      - Standard_NC12s_v2
+      - Standard_NC12s_v3
+      - Standard_NC16as_T4_v3
+      - Standard_NC24s_v2
+      - Standard_NC24s_v3
+      - Standard_NC24rs_v3
+      - Standard_NC24ads_A100_v4
+      - Standard_NC48ads_A100_v4
+      - Standard_NC96ads_A100_v4
+      - Standard_NC64as_T4_v3
+      - Standard_ND40rs_v2
+      - Standard_ND96asr_v4
+      - Standard_ND96amsr_A100_v4
+    default: Standard_NC24s_v3
+    description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available.
+
+  instance_count:
+    type: integer
+    optional: true
+    default: 1
+    description: Number of instances you want to use for deployment. Make sure instance type have enough quota available.
+
+  max_concurrent_requests_per_instance:
+    type: integer
+    default: 1
+    optional: true
+    description: Maximum concurrent requests to be handled per instance
+
+  request_timeout_ms:
+    type: integer
+    default: 60000
+    optional: true
+    description: Request timeout in ms. Max limit is 90000.
+
+  max_queue_wait_ms:
+    type: integer
+    default: 60000
+    optional: true
+    description: Maximum queue wait time of a request in ms
+  
+  failure_threshold_readiness_probe:
+    type: integer
+    default: 10
+    optional: true 
+    description: The number of times system will try after failing the readiness probe
+
+  success_threshold_readiness_probe:
+    type: integer
+    default: 1
+    optional: true 
+    description: The minimum consecutive successes for the readiness probe to be considered successful after having failed
+  
+  timeout_readiness_probe:
+    type: integer
+    default: 10
+    optional: true
+    description: The number of seconds after which the readiness probe times out
+
+  period_readiness_probe:
+    type: integer
+    default: 10
+    optional: true
+    description: How often (in seconds) to perform the readiness probe
+
+  initial_delay_readiness_probe:
+    type: integer
+    default: 10
+    optional: true
+    description: The number of seconds after the container has started before the readiness probe is initiated
+
+  failure_threshold_liveness_probe:
+    type: integer
+    default: 30
+    optional: true 
+    description: The number of times system will try after failing the liveness probe
+  
+  timeout_liveness_probe:
+    type: integer
+    default: 10
+    optional: true
+    description: The number of seconds after which the liveness probe times out
+
+  period_liveness_probe:
+    type: integer
+    default: 10
+    optional: true 
+    description:  How often (in seconds) to perform the liveness probe
+
+  initial_delay_liveness_probe:
+    type: integer
+    default: 10
+    optional: true
+    description: The number of seconds after the container has started before the liveness probe is initiated
+  
+  egress_public_network_access:
+    type: string
+    default: enabled
+    optional: true 
+    enum:
+      - enabled
+      - disabled
+    description: Setting it to disabled secures the deployment by restricting communication between the deployment and the Azure resources used by it
+
+outputs:
+  model_deployment_details:
+    type: uri_file
+    description: Json file to which deployment details will be written
+  model_inference_response:
+    type: uri_file
+    description: JSON file containing inference results
+  deploy_error:
+    type: uri_file
+    description: File containing error messages or stack traces from the validation step.
+
+tags:
+    Preview: ""
+    Internal: ""
diff --git a/assets/common/src/deploy_inference_model.py b/assets/common/src/deploy_inference_model.py
new file mode 100644
index 0000000000..7230258fe5
--- /dev/null
+++ b/assets/common/src/deploy_inference_model.py
@@ -0,0 +1,417 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Run Model deployment module."""
+import argparse
+import json
+import re
+import time
+import base64
+import traceback
+
+from azure.ai.ml.entities import (
+    ManagedOnlineEndpoint,
+    ManagedOnlineDeployment,
+    OnlineRequestSettings,
+    ProbeSettings,
+)
+from azureml._common._error_definition import AzureMLError
+from azureml._common.exceptions import AzureMLException
+from pathlib import Path
+
+from utils.config import AppName, ComponentVariables
+from utils.common_utils import get_mlclient, get_model_name
+from utils.logging_utils import custom_dimensions, get_logger
+from utils.exceptions import (
+    swallow_all_exceptions,
+    OnlineEndpointInvocationError,
+    EndpointCreationError,
+    DeploymentCreationError,
+)
+
+
+MAX_REQUEST_TIMEOUT = 90000
+MAX_INSTANCE_COUNT = 20
+MAX_DEPLOYMENT_LOG_TAIL_LINES = 10000
+
+logger = get_logger(__name__)
+custom_dimensions.app_name = AppName.DEPLOY_MODEL
+
+
+def parse_args():
+    """Return arguments."""
+    parser = argparse.ArgumentParser()
+
+    # Defaults for managed online endpoint has been picked mostly from:
+    # https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-deployment-managed-online
+    # Some of the defaults have been tweaked to cater to large models.
+
+    # add arguments
+    parser.add_argument(
+        "--registration_details_folder",
+        type=Path,
+        help="Folder containing model registration details in a JSON file named model_registration_details.json",
+    )
+    parser.add_argument(
+        "--model_id",
+        type=str,
+        help="Registered mlflow model id",
+    )
+    parser.add_argument(
+        "--inference_payload",
+        type=Path,
+        help="Json file with inference endpoint payload.",
+    )
+    parser.add_argument(
+        "--inference_payload_str",
+        type=str,
+        help="Serialized JSON payload for inference.",
+    )
+    parser.add_argument(
+        "--endpoint_name",
+        type=str,
+        help="Name of the endpoint",
+    )
+    parser.add_argument("--deployment_name", type=str, help="Name of the the deployment")
+    parser.add_argument(
+        "--instance_type",
+        type=str,
+        help="Compute instance type to deploy model",
+        default="Standard_NC24s_v3",
+    )
+    parser.add_argument(
+        "--instance_count",
+        type=int,
+        help="Number of compute instances to deploy model",
+        default=1,
+        choices=range(1, MAX_INSTANCE_COUNT),
+    )
+    parser.add_argument(
+        "--max_concurrent_requests_per_instance",
+        type=int,
+        default=1,
+        help="Maximum concurrent requests to be handled per instance",
+    )
+    parser.add_argument(
+        "--request_timeout_ms",
+        type=int,
+        default=60000,  # 1min
+        help="Request timeout in ms.",
+    )
+    parser.add_argument(
+        "--max_queue_wait_ms",
+        type=int,
+        default=60000,  # 1min
+        help="Maximum queue wait time of a request in ms",
+    )
+    parser.add_argument(
+        "--failure_threshold_readiness_probe",
+        type=int,
+        default=10,
+        help="No of times system will try after failing the readiness probe",
+    )
+    parser.add_argument(
+        "--success_threshold_readiness_probe",
+        type=int,
+        default=1,
+        help="The minimum consecutive successes for the readiness probe to be considered successful, after fail",
+    )
+    parser.add_argument(
+        "--timeout_readiness_probe",
+        type=int,
+        default=10,
+        help="The number of seconds after which the readiness probe times out",
+    )
+    parser.add_argument(
+        "--period_readiness_probe",
+        type=int,
+        default=10,
+        help="How often (in seconds) to perform the readiness probe",
+    )
+    parser.add_argument(
+        "--initial_delay_readiness_probe",
+        type=int,
+        default=10,
+        help="The number of seconds after the container has started before the readiness probe is initiated",
+    )
+    parser.add_argument(
+        "--failure_threshold_liveness_probe",
+        type=int,
+        default=30,
+        help="No of times system will try after failing the liveness probe",
+    )
+    parser.add_argument(
+        "--timeout_liveness_probe",
+        type=int,
+        default=10,
+        help="The number of seconds after which the liveness probe times out",
+    )
+    parser.add_argument(
+        "--period_liveness_probe",
+        type=int,
+        default=10,
+        help="How often (in seconds) to perform the liveness probe",
+    )
+    parser.add_argument(
+        "--initial_delay_liveness_probe",
+        type=int,
+        default=10,
+        help="The number of seconds after the container has started before the liveness probe is initiated",
+    )
+    parser.add_argument(
+        "--egress_public_network_access",
+        type=str,
+        default="enabled",
+        help="Secures the deployment by restricting interaction between deployment and Azure resources used by it",
+    )
+    parser.add_argument(
+        "--model_deployment_details",
+        type=str,
+        help="Json file to which deployment details will be written",
+    )
+    parser.add_argument(
+        "--model_inference_response",
+        type=str,
+        help="Path to the inference response JSON file.",
+    )
+    parser.add_argument(
+        "--deploy_error",
+        type=str,
+        help="Path to the inference response JSON file.",
+    )
+    # parse args
+    args = parser.parse_args()
+    logger.info(f"Args received {args}")
+    print("args received ", args)
+
+    # Validating passed input values
+    if args.max_concurrent_requests_per_instance < 1:
+        raise Exception("Arg max_concurrent_requests_per_instance cannot be less than 1")
+    if args.request_timeout_ms < 1 or args.request_timeout_ms > MAX_REQUEST_TIMEOUT:
+        raise Exception(f"Arg request_timeout_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}")
+    if args.max_queue_wait_ms < 1 or args.max_queue_wait_ms > MAX_REQUEST_TIMEOUT:
+        raise Exception(f"Arg max_queue_wait_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}")
+
+    return args
+
+
+def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args):
+    """Create endpoint and deployment and return details."""
+    endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token")
+
+    # deployment
+    deployment = ManagedOnlineDeployment(
+        name=deployment_name,
+        endpoint_name=endpoint_name,
+        model=model_id,
+        instance_type=args.instance_type,
+        instance_count=args.instance_count,
+        request_settings=OnlineRequestSettings(
+            max_concurrent_requests_per_instance=args.max_concurrent_requests_per_instance,
+            request_timeout_ms=args.request_timeout_ms,
+            max_queue_wait_ms=args.max_queue_wait_ms,
+        ),
+        liveness_probe=ProbeSettings(
+            failure_threshold=args.failure_threshold_liveness_probe,
+            timeout=args.timeout_liveness_probe,
+            period=args.period_liveness_probe,
+            initial_delay=args.initial_delay_liveness_probe,
+        ),
+        readiness_probe=ProbeSettings(
+            failure_threshold=args.failure_threshold_readiness_probe,
+            success_threshold=args.success_threshold_readiness_probe,
+            timeout=args.timeout_readiness_probe,
+            period=args.period_readiness_probe,
+            initial_delay=args.initial_delay_readiness_probe,
+        ),
+        egress_public_network_access=args.egress_public_network_access,
+    )
+
+    try:
+        logger.info(f"Creating endpoint {endpoint_name}")
+        ml_client.begin_create_or_update(endpoint).wait()
+        endpoint = ml_client.online_endpoints.get(endpoint.name)
+        logger.info(f"Endpoint created {endpoint.id}")
+    except Exception as e:
+        raise AzureMLException._with_error(
+            AzureMLError.create(EndpointCreationError, exception=e)
+        )
+
+    try:
+        logger.info(f"Creating deployment {deployment}")
+        ml_client.online_deployments.begin_create_or_update(deployment).wait()
+    except Exception as e:
+        try:
+            logger.error("Deployment failed. Printing deployment logs")
+            logs = ml_client.online_deployments.get_logs(
+                name=deployment_name,
+                endpoint_name=endpoint_name,
+                lines=MAX_DEPLOYMENT_LOG_TAIL_LINES
+            )
+            logger.error(logs)
+        except Exception as ex:
+            logger.error(f"Error in fetching deployment logs: {ex}")
+
+        raise AzureMLException._with_error(
+            AzureMLError.create(DeploymentCreationError, exception=e)
+        )
+
+    logger.info(f"Deployment successful. Updating endpoint to take 100% traffic for deployment {deployment_name}")
+
+    # deployment to take 100% traffic
+    endpoint.traffic = {deployment.name: 100}
+    try:
+        ml_client.begin_create_or_update(endpoint).wait()
+        endpoint = ml_client.online_endpoints.get(endpoint.name)
+    except Exception as e:
+        error_msg = f"Error occured while updating endpoint traffic. Deployment should be usable. Exception - {e}"
+        raise Exception(error_msg)
+
+    logger.info(f"Endpoint updated to take 100% traffic for deployment {deployment_name}")
+    return endpoint, deployment
+
+
+@swallow_all_exceptions(logger)
+def main():
+    """Run main function."""
+    try:
+        args = parse_args()
+        logger.info(f"Arguments: {args}")
+        ml_client = get_mlclient()
+        error_message = ""
+        if args.model_deployment_details:
+            with open(args.model_deployment_details, "w") as outfile:
+                json.dump({}, outfile)
+
+        if args.model_inference_response:
+            with open(args.model_inference_response, "w") as f:
+                json.dump({}, f, indent=4)
+
+        if args.deploy_error:
+            with open(args.deploy_error, "w") as error_file:
+                error_file.write(error_message)
+
+        # get registered model id
+        if args.model_id:
+            model_id = str(args.model_id)
+        elif args.registration_details_folder:
+            registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE
+            if registration_details_file.exists():
+                try:
+                    with open(registration_details_file) as f:
+                        model_info = json.load(f)
+                    model_id = model_info["id"]
+                except Exception as e:
+                    raise Exception(f"model_registration_details json file is missing model information {e}.")
+            else:
+                raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.")
+        else:
+            raise Exception("Arguments model_id and registration_details both are missing.")
+
+        # Endpoint has following restrictions:
+        # 1. Name must begin with lowercase letter
+        # 2. Followed by lowercase letters, hyphen or numbers
+        # 3. End with a lowercase letter or number
+
+        # 1. Replace underscores and slashes by hyphens and convert them to lower case.
+        # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it
+        model_name = get_model_name(model_id)
+
+        endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21]
+        endpoint_name = f"{endpoint_name}-{int(time.time())}"
+        endpoint_name = endpoint_name
+
+        endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name
+        deployment_name = args.deployment_name if args.deployment_name else "default"
+
+        endpoint, deployment = create_endpoint_and_deployment(
+            ml_client=ml_client,
+            endpoint_name=endpoint_name,
+            deployment_name=deployment_name,
+            model_id=model_id,
+            args=args
+        )
+
+        response = None
+        if args.inference_payload or args.inference_payload_str:
+            print("Invoking inference with test payload ...")
+            try:
+                start_time = time.time()
+                if args.inference_payload_str:
+                    print(f"Inference payload string: {args.inference_payload_str}")
+                    decoded_bytes = base64.b64decode(args.inference_payload_str)
+
+                    # Convert bytes to string
+                    decoded_str = decoded_bytes.decode('utf-8')
+                    logger.info(f"Decoded string: {decoded_str}")
+
+                    payload = json.loads(decoded_str)
+                    logger.info(f"Payload:\n {payload}")
+
+                    with open("payload.json", "w") as temp_file:
+                        json.dump(payload, temp_file)
+
+                    response = ml_client.online_endpoints.invoke(
+                        endpoint_name=endpoint_name,
+                        deployment_name=deployment_name,
+                        request_file="payload.json",
+                    )
+                elif args.inference_payload:
+                    response = ml_client.online_endpoints.invoke(
+                        endpoint_name=endpoint_name,
+                        deployment_name=deployment_name,
+                        request_file=args.inference_payload,
+                    )
+
+                end_time = time.time()
+                inference_time_ms = int((end_time - start_time) * 1000)
+
+                logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " +
+                            f"and response: {response}")
+                # Save inference response
+                if args.model_inference_response:
+                    inference_result = {
+                        "response": response,
+                        "inference_time": inference_time_ms
+                    }
+                    with open(args.model_inference_response, "w") as f:
+                        json.dump(inference_result, f, indent=4)
+                    logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}")
+            except Exception as e:
+                raise AzureMLException._with_error(
+                    AzureMLError.create(OnlineEndpointInvocationError, exception=e)
+                )
+
+        print("Saving deployment details ...")
+
+        # write deployment details to file
+        endpoint_type = "aml_online_inference"
+        deployment_details = {
+            "endpoint_name": endpoint.name,
+            "deployment_name": deployment.name,
+            "endpoint_uri": endpoint.__dict__["_scoring_uri"],
+            "endpoint_type": endpoint_type,
+            "instance_type": args.instance_type,
+            "instance_count": args.instance_count,
+            "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance,
+        }
+        json_object = json.dumps(deployment_details, indent=4)
+        with open(args.model_deployment_details, "w") as outfile:
+            outfile.write(json_object)
+        logger.info("Saved deployment details in output json file.")
+
+    except Exception as e:
+        # Capture the full traceback
+        stack_trace = traceback.format_exc()
+        error_message = f"Model deployment failed.\n{stack_trace}"
+        logger.error(f"error_message: {error_message}, deploy_error_path: {args.deploy_error}")
+
+        # Write the error message to the specified error output file
+        if args.deploy_error:
+            with open(args.deploy_error, "w") as error_file:
+                error_file.write(error_message)
+
+
+if __name__ == "__main__":
+    # run main function
+    main()
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index 731b6cb4be..b754f283e3 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -4,7 +4,7 @@ type: pipeline
 name: validate_model_inference
 display_name: Validate Model Inference
 description: deploy a model and validate it using a sample payload
-version: 0.0.1.76
+version: 0.0.1
 
 inputs:
   compute:
@@ -136,7 +136,7 @@ outputs:
 jobs:
   online_deployment_model:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.29
+    component: azureml:deploy_inference_model:0.0.1
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
@@ -155,7 +155,7 @@ jobs:
 
   run_inference_validation:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.66
+    component: azureml:run_inference_validation:0.0.1
     inputs:
       validation_id: ${{parent.inputs.validation_id}}
       sku: ${{parent.inputs.instance_type}}
@@ -170,14 +170,14 @@ jobs:
 
   delete_endpoints:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1
+    component: azureml:delete_endpoint:0.0.7
     inputs:
       model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
 
   publish_results:
     type: command
-    component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.23
+    component: azureml:publish_validation_results_selfserve:0.0.1
     inputs:
       publisher_name: ${{parent.inputs.publisher_name}}
       model_name: ${{parent.inputs.model_name}}
diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py
index 39164a0eb4..afc9df212f 100644
--- a/assets/training/model_management/src/run_inference_validation.py
+++ b/assets/training/model_management/src/run_inference_validation.py
@@ -294,7 +294,7 @@ def run_inference_validation():
                     deployment_error = f.read().strip()
                     error_message += deployment_error
             except Exception as e:
-                logger.warning(f"Failed to read validation_error file: {e}")
+                logger.warning(f"Failed to read deployment_error file: {e}")
 
         if args.validation_error:
             with open(args.validation_error, "w") as error_file:
@@ -331,7 +331,7 @@ def run_inference_validation():
                 try:
                     inference_response = json.loads(inference_response)
                 except json.JSONDecodeError as e:
-                    logger.warning(f"Failed to parse actualResponse as JSON: {e}")
+                    logger.warning(f"Failed to parse actual response as JSON: {e}")
 
         if inference_response is None:
             logger.warning("Actual response is missing or invalid. Setting it to an empty structure.")
@@ -385,7 +385,6 @@ def run_inference_validation():
         if args.validation_error:
             with open(args.validation_error, "w") as error_file:
                 error_file.write(error_message)
-        # raise Exception(f"Failed to run inference validation: {error_message}")
 
 def main():
     run_inference_validation()

From b301dddbb01fb93ea732e59c937bb685b7dfe17d Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Mon, 19 May 2025 10:32:31 +0000
Subject: [PATCH 27/29] reverted deploy_model component

---
 assets/common/src/deploy.py | 227 ++++++++++++------------------------
 1 file changed, 74 insertions(+), 153 deletions(-)

diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py
index 204b04623e..c0558feebe 100644
--- a/assets/common/src/deploy.py
+++ b/assets/common/src/deploy.py
@@ -6,8 +6,6 @@
 import json
 import re
 import time
-import base64
-import traceback
 
 from azure.ai.ml.entities import (
     ManagedOnlineEndpoint,
@@ -62,11 +60,6 @@ def parse_args():
         type=Path,
         help="Json file with inference endpoint payload.",
     )
-    parser.add_argument(
-        "--inference_payload_str",
-        type=str,
-        help="Serialized JSON payload for inference.",
-    )
     parser.add_argument(
         "--endpoint_name",
         type=str,
@@ -169,16 +162,6 @@ def parse_args():
         type=str,
         help="Json file to which deployment details will be written",
     )
-    parser.add_argument(
-        "--model_inference_response",
-        type=str,
-        help="Path to the inference response JSON file.",
-    )
-    parser.add_argument(
-        "--deploy_error",
-        type=str,
-        help="Path to the inference response JSON file.",
-    )
     # parse args
     args = parser.parse_args()
     logger.info(f"Args received {args}")
@@ -197,7 +180,7 @@ def parse_args():
 
 def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args):
     """Create endpoint and deployment and return details."""
-    endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token")
+    endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="key")
 
     # deployment
     deployment = ManagedOnlineDeployment(
@@ -274,147 +257,85 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen
 @swallow_all_exceptions(logger)
 def main():
     """Run main function."""
-    try:
-        args = parse_args()
-        logger.info(f"Arguments: {args}")
-        ml_client = get_mlclient()
-        error_message = ""
-        if args.model_deployment_details:
-            with open(args.model_deployment_details, "w") as outfile:
-                json.dump({}, outfile)
-
-        if args.model_inference_response:
-            with open(args.model_inference_response, "w") as f:
-                json.dump({}, f, indent=4)
-
-        if args.deploy_error:
-            with open(args.deploy_error, "w") as error_file:
-                error_file.write(error_message)
-
-        # get registered model id
-        if args.model_id:
-            model_id = str(args.model_id)
-        elif args.registration_details_folder:
-            registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE
-            if registration_details_file.exists():
-                try:
-                    with open(registration_details_file) as f:
-                        model_info = json.load(f)
-                    model_id = model_info["id"]
-                except Exception as e:
-                    raise Exception(f"model_registration_details json file is missing model information {e}.")
-            else:
-                raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.")
-        else:
-            raise Exception("Arguments model_id and registration_details both are missing.")
-
-        # Endpoint has following restrictions:
-        # 1. Name must begin with lowercase letter
-        # 2. Followed by lowercase letters, hyphen or numbers
-        # 3. End with a lowercase letter or number
-
-        # 1. Replace underscores and slashes by hyphens and convert them to lower case.
-        # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it
-        model_name = get_model_name(model_id)
-
-        endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21]
-        endpoint_name = f"{endpoint_name}-{int(time.time())}"
-        endpoint_name = endpoint_name
-
-        endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name
-        deployment_name = args.deployment_name if args.deployment_name else "default"
-
-        endpoint, deployment = create_endpoint_and_deployment(
-            ml_client=ml_client,
-            endpoint_name=endpoint_name,
-            deployment_name=deployment_name,
-            model_id=model_id,
-            args=args
-        )
-
-        response = None
-        if args.inference_payload or args.inference_payload_str:
-            print("Invoking inference with test payload ...")
+    args = parse_args()
+    ml_client = get_mlclient()
+    # get registered model id
+
+    if args.model_id:
+        model_id = str(args.model_id)
+    elif args.registration_details_folder:
+        registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE
+        if registration_details_file.exists():
             try:
-                start_time = time.time()
-                if args.inference_payload_str:
-                    print(f"Inference payload string: {args.inference_payload_str}")
-                    decoded_bytes = base64.b64decode(args.inference_payload_str)
-
-                    # Convert bytes to string
-                    decoded_str = decoded_bytes.decode('utf-8')
-                    logger.info(f"Decoded string: {decoded_str}")
-
-                    payload = json.loads(decoded_str)
-                    logger.info(f"Payload:\n {payload}")
-
-                    with open("payload.json", "w") as temp_file:
-                        json.dump(payload, temp_file)
-
-                    response = ml_client.online_endpoints.invoke(
-                        endpoint_name=endpoint_name,
-                        deployment_name=deployment_name,
-                        request_file="payload.json",
-                    )
-                elif args.inference_payload:
-                    response = ml_client.online_endpoints.invoke(
-                        endpoint_name=endpoint_name,
-                        deployment_name=deployment_name,
-                        request_file=args.inference_payload,
-                    )
-
-                end_time = time.time()
-                inference_time_ms = int((end_time - start_time) * 1000)
-
-                logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " +
-                            f"and response: {response}")
-                # Save inference response
-                if args.model_inference_response:
-                    inference_result = {
-                        "response": response,
-                        "inference_time": inference_time_ms
-                    }
-                    with open(args.model_inference_response, "w") as f:
-                        json.dump(inference_result, f, indent=4)
-                    logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}")
+                with open(registration_details_file) as f:
+                    model_info = json.load(f)
+                model_id = model_info["id"]
             except Exception as e:
-                raise AzureMLException._with_error(
-                    AzureMLError.create(OnlineEndpointInvocationError, exception=e)
-                )
-
-        print("Saving deployment details ...")
-
-        # write deployment details to file
-        endpoint_type = "aml_online_inference"
-        deployment_details = {
-            "endpoint_name": endpoint.name,
-            "deployment_name": deployment.name,
-            "endpoint_uri": endpoint.__dict__["_scoring_uri"],
-            "endpoint_type": endpoint_type,
-            "instance_type": args.instance_type,
-            "instance_count": args.instance_count,
-            "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance,
-        }
-        json_object = json.dumps(deployment_details, indent=4)
-        with open(args.model_deployment_details, "w") as outfile:
-            outfile.write(json_object)
-        logger.info("Saved deployment details in output json file.")
+                raise Exception(f"model_registration_details json file is missing model information {e}.")
+        else:
+            raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.")
+    else:
+        raise Exception("Arguments model_id and registration_details both are missing.")
 
-    except Exception as e:
-        # Capture the full traceback
-        stack_trace = traceback.format_exc()
-        error_message = f"Model deployment failed.\n{stack_trace}"
-        logger.error(f"error_message from stack trace: {error_message}, deploy_error_path: {args.deploy_error}")
+    # Endpoint has following restrictions:
+    # 1. Name must begin with lowercase letter
+    # 2. Followed by lowercase letters, hyphen or numbers
+    # 3. End with a lowercase letter or number
 
-        # Write the error message to the specified error output file
-        if args.deploy_error:
-            with open(args.deploy_error, "w") as error_file:
-                error_file.write(error_message)
+    # 1. Replace underscores and slashes by hyphens and convert them to lower case.
+    # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it
+    model_name = get_model_name(model_id)
 
-        # Re-raise the exception with the full traceback
-        # raise Exception(error_message)
+    endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21]
+    endpoint_name = f"{endpoint_name}-{int(time.time())}"
+    endpoint_name = endpoint_name
 
+    endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name
+    deployment_name = args.deployment_name if args.deployment_name else "default"
+
+    endpoint, deployment = create_endpoint_and_deployment(
+        ml_client=ml_client,
+        endpoint_name=endpoint_name,
+        deployment_name=deployment_name,
+        model_id=model_id,
+        args=args
+    )
+
+    if args.inference_payload:
+        print("Invoking inference with test payload ...")
+        try:
+            response = ml_client.online_endpoints.invoke(
+                endpoint_name=endpoint_name,
+                deployment_name=deployment_name,
+                request_file=args.inference_payload,
+            )
+            print(f"Response:\n{response}")
+            logger.info(f"Endpoint invoked successfully with response :{response}")
+        except Exception as e:
+            raise AzureMLException._with_error(
+                AzureMLError.create(OnlineEndpointInvocationError, exception=e)
+            )
 
+    print("Saving deployment details ...")
+
+    # write deployment details to file
+    endpoint_type = "aml_online_inference"
+    deployment_details = {
+        "endpoint_name": endpoint.name,
+        "deployment_name": deployment.name,
+        "endpoint_uri": endpoint.__dict__["_scoring_uri"],
+        "endpoint_type": endpoint_type,
+        "instance_type": args.instance_type,
+        "instance_count": args.instance_count,
+        "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance,
+    }
+    json_object = json.dumps(deployment_details, indent=4)
+    with open(args.model_deployment_details, "w") as outfile:
+        outfile.write(json_object)
+    logger.info("Saved deployment details in output json file.")
+
+
+# run script
 if __name__ == "__main__":
     # run main function
-    main()
+    main()
\ No newline at end of file

From 992f9499bc70b6e13b62a8a534493c57254ec2a9 Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Mon, 19 May 2025 12:14:33 +0000
Subject: [PATCH 28/29] reverted deploy_model component

---
 .../common/components/deploy_model/spec.yaml  | 225 ------------------
 1 file changed, 225 deletions(-)
 delete mode 100644 assets/common/components/deploy_model/spec.yaml

diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml
deleted file mode 100644
index f4678e2426..0000000000
--- a/assets/common/components/deploy_model/spec.yaml
+++ /dev/null
@@ -1,225 +0,0 @@
-$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
-name: deploy_model
-version: 0.0.12.29
-type: command
-
-is_deterministic: True
-
-display_name: Deploy model
-description:
-  Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached.
-
-environment: azureml://registries/azureml/environments/python-sdk-v2/versions/29
-
-code: ../../src
-command: >-
-  python deploy.py
-  $[[--registration_details_folder ${{inputs.registration_details_folder}}]]
-  $[[--model_id ${{inputs.model_id}}]]
-  $[[--inference_payload ${{inputs.inference_payload}}]]
-  $[[--inference_payload_str ${{inputs.inference_payload_str}}]]
-  $[[--endpoint_name ${{inputs.endpoint_name}}]]
-  $[[--deployment_name ${{inputs.deployment_name}}]]
-  $[[--instance_type ${{inputs.instance_type}}]]
-  $[[--instance_count ${{inputs.instance_count}}]]
-  $[[--max_concurrent_requests_per_instance ${{inputs.max_concurrent_requests_per_instance}}]] 
-  $[[--request_timeout_ms ${{inputs.request_timeout_ms}}]]
-  $[[--max_queue_wait_ms ${{inputs.max_queue_wait_ms}}]]
-  $[[--failure_threshold_readiness_probe ${{inputs.failure_threshold_readiness_probe}}]]
-  $[[--success_threshold_readiness_probe ${{inputs.success_threshold_readiness_probe}}]]
-  $[[--timeout_readiness_probe ${{inputs.timeout_readiness_probe}}]]
-  $[[--period_readiness_probe ${{inputs.period_readiness_probe}}]]
-  $[[--initial_delay_readiness_probe ${{inputs.initial_delay_readiness_probe}}]]
-  $[[--failure_threshold_liveness_probe ${{inputs.failure_threshold_liveness_probe}}]]
-  $[[--timeout_liveness_probe ${{inputs.timeout_liveness_probe}}]]
-  $[[--period_liveness_probe ${{inputs.period_liveness_probe}}]]
-  $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]]
-  $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]]
-  --model_deployment_details ${{outputs.model_deployment_details}}
-  --model_inference_response ${{outputs.model_inference_response}}
-  --deploy_error ${{outputs.deploy_error}}
-
-inputs:
-  # Output of registering component
-  registration_details_folder:
-    type: uri_folder
-    optional: true
-    description: Folder containing model registration details in a JSON file named model_registration_details.json
-
-  model_id:
-    type: string
-    optional: true 
-    description: |
-      Asset ID of the model registered in workspace/registry.
-      Registry - azureml://registries/<registry-name>/models/<model-name>/versions/<version>
-      Workspace - azureml:<model-name>:<version>
-
-  inference_payload:
-    type: uri_file
-    optional: true
-    description: JSON payload which would be used to validate deployment
-
-  inference_payload_str:
-    type: string
-    optional: true
-    description: Serialized JSON payload which would be used to validate deployment
-
-  endpoint_name:
-    type: string
-    optional: true
-    description: Name of the endpoint
-
-  deployment_name:
-    type: string
-    optional: true
-    default: default
-    description: Name of the deployment
-  
-  instance_type:
-    type: string
-    optional: true
-    enum:
-      - Standard_DS1_v2
-      - Standard_DS2_v2
-      - Standard_DS3_v2
-      - Standard_DS4_v2
-      - Standard_DS5_v2
-      - Standard_F2s_v2
-      - Standard_F4s_v2
-      - Standard_F8s_v2
-      - Standard_F16s_v2
-      - Standard_F32s_v2
-      - Standard_F48s_v2
-      - Standard_F64s_v2
-      - Standard_F72s_v2
-      - Standard_FX24mds
-      - Standard_FX36mds
-      - Standard_FX48mds
-      - Standard_E2s_v3
-      - Standard_E4s_v3
-      - Standard_E8s_v3
-      - Standard_E16s_v3
-      - Standard_E32s_v3
-      - Standard_E48s_v3
-      - Standard_E64s_v3
-      - Standard_NC4as_T4_v3
-      - Standard_NC6s_v2
-      - Standard_NC6s_v3
-      - Standard_NC8as_T4_v3
-      - Standard_NC12s_v2
-      - Standard_NC12s_v3
-      - Standard_NC16as_T4_v3
-      - Standard_NC24s_v2
-      - Standard_NC24s_v3
-      - Standard_NC24rs_v3
-      - Standard_NC24ads_A100_v4
-      - Standard_NC48ads_A100_v4
-      - Standard_NC96ads_A100_v4
-      - Standard_NC64as_T4_v3
-      - Standard_ND40rs_v2
-      - Standard_ND96asr_v4
-      - Standard_ND96amsr_A100_v4
-    default: Standard_NC24s_v3
-    description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available.
-
-  instance_count:
-    type: integer
-    optional: true
-    default: 1
-    description: Number of instances you want to use for deployment. Make sure instance type have enough quota available.
-
-  max_concurrent_requests_per_instance:
-    type: integer
-    default: 1
-    optional: true
-    description: Maximum concurrent requests to be handled per instance
-
-  request_timeout_ms:
-    type: integer
-    default: 60000
-    optional: true
-    description: Request timeout in ms. Max limit is 90000.
-
-  max_queue_wait_ms:
-    type: integer
-    default: 60000
-    optional: true
-    description: Maximum queue wait time of a request in ms
-  
-  failure_threshold_readiness_probe:
-    type: integer
-    default: 10
-    optional: true 
-    description: The number of times system will try after failing the readiness probe
-
-  success_threshold_readiness_probe:
-    type: integer
-    default: 1
-    optional: true 
-    description: The minimum consecutive successes for the readiness probe to be considered successful after having failed
-  
-  timeout_readiness_probe:
-    type: integer
-    default: 10
-    optional: true
-    description: The number of seconds after which the readiness probe times out
-
-  period_readiness_probe:
-    type: integer
-    default: 10
-    optional: true
-    description: How often (in seconds) to perform the readiness probe
-
-  initial_delay_readiness_probe:
-    type: integer
-    default: 10
-    optional: true
-    description: The number of seconds after the container has started before the readiness probe is initiated
-
-  failure_threshold_liveness_probe:
-    type: integer
-    default: 30
-    optional: true 
-    description: The number of times system will try after failing the liveness probe
-  
-  timeout_liveness_probe:
-    type: integer
-    default: 10
-    optional: true
-    description: The number of seconds after which the liveness probe times out
-
-  period_liveness_probe:
-    type: integer
-    default: 10
-    optional: true 
-    description:  How often (in seconds) to perform the liveness probe
-
-  initial_delay_liveness_probe:
-    type: integer
-    default: 10
-    optional: true
-    description: The number of seconds after the container has started before the liveness probe is initiated
-  
-  egress_public_network_access:
-    type: string
-    default: enabled
-    optional: true 
-    enum:
-      - enabled
-      - disabled
-    description: Setting it to disabled secures the deployment by restricting communication between the deployment and the Azure resources used by it
-
-outputs:
-  model_deployment_details:
-    type: uri_file
-    description: Json file to which deployment details will be written
-  model_inference_response:
-    type: uri_file
-    description: JSON file containing inference results
-  deploy_error:
-    type: uri_file
-    description: File containing error messages or stack traces from the validation step.
-
-tags:
-    Preview: ""
-    Internal: ""

From b1c61c892900b9be33a1bc97daccd13be0d172de Mon Sep 17 00:00:00 2001
From: abhishekMS2024 <abhisheku@microsoft.com>
Date: Thu, 22 May 2025 09:37:17 +0000
Subject: [PATCH 29/29] Maap Self serve validation AML pipeline custom model
 support

---
 .../components/deploy_inference_model/spec.yaml    |  9 +++++++++
 assets/common/src/deploy_inference_model.py        | 14 +++++++++++++-
 .../publish_validation_results_selfserve/spec.yaml |  4 ++--
 .../components/run_inference_validation/spec.yaml  |  4 ++--
 .../components/validate_model_inference/spec.yaml  |  9 +++++++++
 5 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/assets/common/components/deploy_inference_model/spec.yaml b/assets/common/components/deploy_inference_model/spec.yaml
index efae05fea4..407c0a2416 100644
--- a/assets/common/components/deploy_inference_model/spec.yaml
+++ b/assets/common/components/deploy_inference_model/spec.yaml
@@ -16,6 +16,7 @@ command: >-
   python deploy_inference_model.py
   $[[--registration_details_folder ${{inputs.registration_details_folder}}]]
   $[[--model_id ${{inputs.model_id}}]]
+  $[[--environment_id ${{inputs.environment_id}}]]
   $[[--inference_payload ${{inputs.inference_payload}}]]
   $[[--inference_payload_str ${{inputs.inference_payload_str}}]]
   $[[--endpoint_name ${{inputs.endpoint_name}}]]
@@ -54,6 +55,14 @@ inputs:
       Registry - azureml://registries/<registry-name>/models/<model-name>/versions/<version>
       Workspace - azureml:<model-name>:<version>
 
+  environment_id:
+    type: string
+    optional: true
+    description: |
+      Asset ID of the environment registered in workspace/registry.
+      Registry - azureml://registries/<registry-name>/environments/<environment-name>/versions/<version>
+      Workspace - azureml:<environment-name>:<version>
+
   inference_payload:
     type: uri_file
     optional: true
diff --git a/assets/common/src/deploy_inference_model.py b/assets/common/src/deploy_inference_model.py
index 7230258fe5..0fa2f68be7 100644
--- a/assets/common/src/deploy_inference_model.py
+++ b/assets/common/src/deploy_inference_model.py
@@ -57,6 +57,12 @@ def parse_args():
         type=str,
         help="Registered mlflow model id",
     )
+    parser.add_argument(
+        "--environment_id",
+        type=str,
+        required=False,
+        help="AzureML environment ID to use for deployment",
+    )
     parser.add_argument(
         "--inference_payload",
         type=Path,
@@ -195,7 +201,7 @@ def parse_args():
     return args
 
 
-def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args):
+def create_endpoint_and_deployment(ml_client, model_id, environment_id, endpoint_name, deployment_name, args):
     """Create endpoint and deployment and return details."""
     endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token")
 
@@ -204,6 +210,7 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen
         name=deployment_name,
         endpoint_name=endpoint_name,
         model=model_id,
+        environment=environment_id,
         instance_type=args.instance_type,
         instance_count=args.instance_count,
         request_settings=OnlineRequestSettings(
@@ -278,6 +285,7 @@ def main():
         args = parse_args()
         logger.info(f"Arguments: {args}")
         ml_client = get_mlclient()
+
         error_message = ""
         if args.model_deployment_details:
             with open(args.model_deployment_details, "w") as outfile:
@@ -291,6 +299,9 @@ def main():
             with open(args.deploy_error, "w") as error_file:
                 error_file.write(error_message)
 
+        # get environment id
+        environment_id = args.environment_id if hasattr(args, "environment_id") else None
+
         # get registered model id
         if args.model_id:
             model_id = str(args.model_id)
@@ -329,6 +340,7 @@ def main():
             endpoint_name=endpoint_name,
             deployment_name=deployment_name,
             model_id=model_id,
+            environment_id=environment_id,
             args=args
         )
 
diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
index 498fde2dc1..db5ab152ff 100644
--- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
+++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml
@@ -3,12 +3,12 @@ type: command
 is_deterministic: true
 
 name: publish_validation_results_selfserve
-version: 0.0.1.23
+version: 0.0.1
 display_name: Publish model validation results to Self-Serve
 description: |
   This component publishes model validation results to the Self-Serve database.
 
-environment: azureml://registries/azureml/environments/model-management/versions/41
+environment: azureml://registries/azureml/environments/model-management/versions/47
 
 inputs:
   selfserve_base_url:
diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml
index c5e9f5cfb0..3feba65c60 100644
--- a/assets/training/model_management/components/run_inference_validation/spec.yaml
+++ b/assets/training/model_management/components/run_inference_validation/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: run_inference_validation
-version: 0.0.1.66
+version: 0.0.1
 type: command
 
 is_deterministic: True
@@ -8,7 +8,7 @@ is_deterministic: True
 display_name: Run Inference Validation
 description: Compares the expected inference response with the actual response from model deployment.
 
-environment: azureml://registries/azureml/environments/model-management/versions/41
+environment: azureml://registries/azureml/environments/model-management/versions/47
 
 code: ../../src
 command: >-
diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml
index b754f283e3..49524c34c8 100644
--- a/assets/training/model_management/components/validate_model_inference/spec.yaml
+++ b/assets/training/model_management/components/validate_model_inference/spec.yaml
@@ -74,6 +74,14 @@ inputs:
       Registry - azureml://registries/<registry-name>/models/<model-name>/versions/<version>
       Workspace - azureml:<model-name>:<version>
 
+  environment_id:
+    type: string
+    optional: false
+    description: |
+      Asset ID of the environment registered in workspace/registry.
+      Registry - azureml://registries/<registry-name>/environments/<environment-name>/versions/<version>
+      Workspace - azureml:<environment-name>:<version>
+
   model_name:
     type: string
     optional: false
@@ -140,6 +148,7 @@ jobs:
     compute: ${{parent.inputs.compute}}
     inputs:
       model_id: ${{parent.inputs.model_id}}
+      environment_id: ${{parent.inputs.environment_id}}
       inference_payload_str: ${{parent.inputs.inference_payload}}
       endpoint_name: ${{parent.inputs.endpoint_name}}
       deployment_name: ${{parent.inputs.deployment_name}}