From 21f4d2bbe9eab11cb1cbb09de34a1a71ab48814c Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Tue, 25 Mar 2025 11:00:38 +0530 Subject: [PATCH 01/29] AML pipeline component for MaaP self serve validation --- .../publish_result_selfserve/asset.yaml | 11 + .../publish_result_selfserve/spec.yaml | 51 ++++ .../run_inference_validation/asset.yaml | 3 + .../run_inference_validation/spec.yaml | 45 ++++ .../validate_model_inference/asset.yaml | 3 + .../validate_model_inference/spec.yaml | 180 ++++++++++++++ .../src/azureml/model/mgmt/config.py | 1 + .../publish_validation_results_selfserve.py | 175 +++++++++++++ .../src/run_inference_validation.py | 232 ++++++++++++++++++ 9 files changed, 701 insertions(+) create mode 100644 assets/training/model_management/components/publish_result_selfserve/asset.yaml create mode 100644 assets/training/model_management/components/publish_result_selfserve/spec.yaml create mode 100644 assets/training/model_management/components/run_inference_validation/asset.yaml create mode 100644 assets/training/model_management/components/run_inference_validation/spec.yaml create mode 100644 assets/training/model_management/components/validate_model_inference/asset.yaml create mode 100644 assets/training/model_management/components/validate_model_inference/spec.yaml create mode 100644 assets/training/model_management/src/publish_validation_results_selfserve.py create mode 100644 assets/training/model_management/src/run_inference_validation.py diff --git a/assets/training/model_management/components/publish_result_selfserve/asset.yaml b/assets/training/model_management/components/publish_result_selfserve/asset.yaml new file mode 100644 index 0000000000..9d4136ecd3 --- /dev/null +++ b/assets/training/model_management/components/publish_result_selfserve/asset.yaml @@ -0,0 +1,11 @@ +type: component +spec: spec.yaml +categories: + [ + "CommonBench Baselining", + "Benchmarking", + "Run Benchmark", + "Publish Results", + "Self-Serve API", + "API Inferencing" + ] diff --git a/assets/training/model_management/components/publish_result_selfserve/spec.yaml b/assets/training/model_management/components/publish_result_selfserve/spec.yaml new file mode 100644 index 0000000000..23ad9f9d40 --- /dev/null +++ b/assets/training/model_management/components/publish_result_selfserve/spec.yaml @@ -0,0 +1,51 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json +type: command +is_deterministic: true + +name: publish_validation_results_selfserve +version: 0.0.1 +display_name: Publish model validation results to Self-Serve +description: | + This component publishes model validation results to the Self-Serve database. + +environment: azureml://registries/azureml/environments/model-management/versions/41 + +inputs: + selfserve_base_url: + type: string + optional: false + default: "https://int.api.azureml-test.ms" + description: Base URL of the model publisher self-serve API + model_name: + type: string + optional: false + description: Name of the model (e.g., VerboGenie) + model_version: + type: integer + optional: false + description: Model onboarding version (e.g., 5) + publisher_name: + type: string + optional: false + description: Name of the model publisher (e.g., ContosoAI) + sku: + type: string + optional: false + default: "Standard_NC24ads_A100_v4" + description: Suggested SKU based on benchmark results + metrics_storage_uri: + type: uri_file + optional: false + mode: ro_mount + description: Path to the file containing the validation metrics csv storage path + +code: ../../src + +command: >- + python publish_validation_results_selfserve.py + --selfserve-base-url ${{inputs.selfserve_base_url}} + --model-name ${{inputs.model_name}} + --model-version ${{inputs.model_version}} + --publisher-name ${{inputs.publisher_name}} + --sku ${{inputs.sku}} + --metrics-storage-uri ${{inputs.metrics_storage_uri}} \ No newline at end of file diff --git a/assets/training/model_management/components/run_inference_validation/asset.yaml b/assets/training/model_management/components/run_inference_validation/asset.yaml new file mode 100644 index 0000000000..c01772d398 --- /dev/null +++ b/assets/training/model_management/components/run_inference_validation/asset.yaml @@ -0,0 +1,3 @@ +type: component +spec: spec.yaml +categories: ["Model"] diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml new file mode 100644 index 0000000000..91c38775a6 --- /dev/null +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -0,0 +1,45 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json +name: run_inference_validation +version: 0.0.1 +type: command + +is_deterministic: True + +display_name: Run Inference Validation +description: Compares the expected inference response with the actual response from model deployment. + +environment: azureml://registries/azureml/environments/model-management/versions/41 + +code: ../../src +command: >- + python run_inference_validation.py + --inference_payload ${{inputs.inference_payload}} + --expected_response ${{inputs.expected_response}} + --inference_response ${{inputs.inference_response}} + --validation_result ${{outputs.validation_result}} + --metrics_storage_uri ${{outputs.metrics_storage_uri}} + +inputs: + inference_payload: + type: uri_file + description: JSON input payload used for inference. + + expected_response: + type: uri_file + description: JSON file containing the expected inference response. + + inference_response: + type: uri_file + description: JSON file containing the actual inference response from the deployed model. + +outputs: + validation_result: + type: uri_file + description: JSON file containing the validation results. + metrics_storage_uri: + type: uri_file + description: JSON file containing the validation metrics csv storage path + +tags: + Preview: "" + Internal: "" diff --git a/assets/training/model_management/components/validate_model_inference/asset.yaml b/assets/training/model_management/components/validate_model_inference/asset.yaml new file mode 100644 index 0000000000..c01772d398 --- /dev/null +++ b/assets/training/model_management/components/validate_model_inference/asset.yaml @@ -0,0 +1,3 @@ +type: component +spec: spec.yaml +categories: ["Model"] diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml new file mode 100644 index 0000000000..e82eab4ab2 --- /dev/null +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -0,0 +1,180 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json +type: pipeline + +name: validate_model_inference +display_name: Validate Model Inference +description: deploy a model and validate it using a sample payload +version: 0.0.1 + +experiment_name: validate_model + +inputs: + compute: + type: string + optional: true + default: serverless + description: Compute for model deployment and inferencing + + instance_type: + type: string + optional: true + enum: + - Standard_DS1_v2 + - Standard_DS2_v2 + - Standard_DS3_v2 + - Standard_DS4_v2 + - Standard_DS5_v2 + - Standard_F2s_v2 + - Standard_F4s_v2 + - Standard_F8s_v2 + - Standard_F16s_v2 + - Standard_F32s_v2 + - Standard_F48s_v2 + - Standard_F64s_v2 + - Standard_F72s_v2 + - Standard_FX24mds + - Standard_FX36mds + - Standard_FX48mds + - Standard_E2s_v3 + - Standard_E4s_v3 + - Standard_E8s_v3 + - Standard_E16s_v3 + - Standard_E32s_v3 + - Standard_E48s_v3 + - Standard_E64s_v3 + - Standard_NC4as_T4_v3 + - Standard_NC6s_v2 + - Standard_NC6s_v3 + - Standard_NC8as_T4_v3 + - Standard_NC12s_v2 + - Standard_NC12s_v3 + - Standard_NC16as_T4_v3 + - Standard_NC24s_v2 + - Standard_NC24s_v3 + - Standard_NC24rs_v3 + - Standard_NC64as_T4_v3 + - Standard_ND40rs_v2 + - Standard_ND96asr_v4 + - Standard_ND96amsr_A100_v4 + default: Standard_NC6s_v3 + description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available. + + instance_count: + type: integer + optional: true + default: 1 + description: Number of instances you want to use for deployment. Make sure instance type have enough quota available. + + model_id: + type: string + optional: true + description: | + Asset ID of the model registered in workspace/registry. + Registry - azureml://registries//models//versions/ + Workspace - azureml:: + + model_name: + type: string + optional: false + description: Name of the model to validate. + + model_version: + type: integer + optional: false + description: Model onboarding version (e.g., 5) + + publisher_name: + type: string + optional: false + description: Name of the model publisher (e.g., ContosoAI) + + selfserve_base_url: + type: string + optional: true + default: "https://int.api.azureml-test.ms" + description: Base URL of the model publisher self-serve API + + sku: + type: string + optional: true + default: "Standard_NC24ads_A100_v4" + description: SKU of the deployed model endpoint. + + inference_payload: + type: uri_file + optional: true + description: JSON payload which would be used to validate deployment + + endpoint_name: + type: string + optional: true + description: Name of the endpoint + + deployment_name: + type: string + optional: true + default: default + description: Name of the deployment + + expected_inference_response: + type: uri_file + description: JSON file containing the expected inference response. + +# Pipeline outputs +outputs: + validation_result: + description: Output file containing the validation results. + type: uri_file + +jobs: + online_deployment_model: + type: command + component: azureml:/deploy_model:0.0.12 + compute: ${{parent.inputs.compute}} + inputs: + model_id: ${{parent.inputs.model_id}} + inference_payload: ${{parent.inputs.inference_payload}} + endpoint_name: ${{parent.inputs.endpoint_name}} + deployment_name: ${{parent.inputs.deployment_name}} + instance_type: ${{parent.inputs.instance_type}} + instance_count: ${{parent.inputs.instance_count}} + identity: + type: user_identity + outputs: + model_deployment_details: + type: uri_file + inference_response: + type: uri_file + + run_inference_validation: + type: command + component: azureml:run_inference_validation:0.0.1 + identity: + type: user_identity + inputs: + inference_payload: ${{parent.inputs.inference_payload}} + expected_response: ${{parent.inputs.expected_inference_response}} + inference_response: ${{parent.jobs.online_deployment_model.outputs.inference_response}} + outputs: + validation_result: ${{parent.outputs.validation_result}} + + delete_endpoints: + type: command + component: azureml:delete_endpoint:0.0.7 + inputs: + model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} + endpoint_name: ${{parent.inputs.endpoint_name}} + identity: + type: user_identity + compute: ${{parent.inputs.compute}} + + publish_results: + type: command + component: azureml:publish_validation_results_selfserve:0.0.1 + inputs: + publisher_name: ${{parent.inputs.publisher_name}} + model_name: ${{parent.inputs.model_name}} + model_version: ${{parent.inputs.model_version}} + sku: ${{parent.inputs.instance_type}} + selfserve_base_url: ${{parent.inputs.selfserve_base_url}} + metrics_storage_uri: ${{parent.jobs.run_inference_validation.outputs.metrics_storage_uri}} diff --git a/assets/training/model_management/src/azureml/model/mgmt/config.py b/assets/training/model_management/src/azureml/model/mgmt/config.py index 2cd6b5f5d4..7de73aa0fe 100644 --- a/assets/training/model_management/src/azureml/model/mgmt/config.py +++ b/assets/training/model_management/src/azureml/model/mgmt/config.py @@ -47,6 +47,7 @@ class AppName: DOWNLOAD_MODEL = "download_model" CONVERT_MODEL_TO_MLFLOW = "convert_model_to_mlflow" VALIDATION_TRIGGER_IMPORT = "validation_trigger_import" + RUN_INFERENCE_VALIDATION = "run_inference_validation" class LoggerConfig: diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py new file mode 100644 index 0000000000..5b92e3bdff --- /dev/null +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -0,0 +1,175 @@ +"""Update model onboarding version with CommonBench results.""" + +import sys +import os +import uuid +import json +import logging +import requests +import argparse +from datetime import datetime, timezone +from azure.identity import ManagedIdentityCredential +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential +from datetime import datetime + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def read_results_from_file(file_path): + """Read the metrics results from the given file path.""" + try: + with open(file_path, 'r') as f: + results_dict = json.load(f) + print(f"Results loaded from {file_path}") + return results_dict + except Exception as e: + print(f"Error reading from file: {e}") + return None + + +def update_model_onboarding_version( + publisher_name, + model_name, + model_version, + selfserve_base_url, + sku, + metrics_storage_uri +): + """Update model onboarding version with benchmark results.""" + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + is_obo = False + try: + credential = AzureMLOnBehalfOfCredential() + token = credential.get_token( + "https://management.azure.com/.default").token + is_obo = True + except Exception as ex: + logger.warning(f"Failed to get OBO credentials - {ex}") + + if not is_obo: + try: + logger.info("Fetching MSI credential") + msi_client_id = os.environ.get("DEFAULT_IDENTITY_CLIENT_ID") + credential = ManagedIdentityCredential(client_id=msi_client_id) + token = credential.get_token( + "https://management.azure.com/.default").token + except Exception as ex: + raise (f"Failed to get MSI credentials : {ex}") + + metrics_path_dict = read_results_from_file(metrics_storage_uri) + + run_id = str(uuid.uuid4()) + + validation_result = [] + + if metrics_path_dict.get("perf_bench_path") is not None: + validation_result.append({ + "runId": run_id, + "type": "PERF_BENCHMARK", + "passed": True, + "message": "Baseline data is captured successfully", + "validationResultUrl": metrics_path_dict.get("perf_bench_path"), + "createdTime": current_time, + "status": "success", + "sku": sku + }) + + if metrics_path_dict.get("api_validation_path") is not None: + validation_result.append({ + "runId": run_id, + "type": "API_VALIDATION", + "passed": True, + "message": "API validation passed successfully", + "validationResultUrl": metrics_path_dict.get("api_validation_path"), + "status": "success", + "createdTime": current_time, + "sku": sku + }) + + if metrics_path_dict.get("api_inference_path") is not None: + validation_result.append({ + "runId": run_id, + "type": "API_VALIDATION", + "passed": True, + "message": "API inference passed successfully", + "validationResultUrl": metrics_path_dict.get("api_inference_path"), + "status": "success", + "createdTime": current_time, + "sku": sku + }) + + payload = { + "suggestedSKU": sku, + "status": "Validation", + "subStatus": "Validation_Successful", + "validationResult": validation_result + } + + api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31" + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "User-Agent": "AzureML-ModelPublishing/1.0" + } + + try: + logger.info(f"Sending request to {api_url} \n, headers: {headers} \n, payload: {payload}") + + response = requests.put(api_url, headers=headers, json=payload) + + logger.info(f"Response: {response.json()}") + + if response.ok: + logger.info( + f"Successfully updated model onboarding version. Response: {response.status_code}") + return {"status_code": response.status_code} + else: + logger.error( + f"Failed to update model onboarding version. Status code: {response.status_code}") + logger.error(f"Response content: {response.json()}") + raise Exception( + f"Request failed with status code {response.status_code}: {response.json()}") + except requests.RequestException as e: + logging.error(f"Request failed: {e}") + raise + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Update model onboarding version with CommonBench validation results") + + parser.add_argument("--publisher-name", required=True, + help="Name of the model publisher (e.g., ContosoAI)") + parser.add_argument("--model-name", required=True, + help="Name of the model (e.g., VerboGenie)") + parser.add_argument("--model-version", required=True, + help="Model onboarding version (e.g., 5)") + parser.add_argument("--selfserve-base-url", required=True, + default="https://int.api.azureml-test.ms", + help="Base URL of the model publisher self-serve API") + parser.add_argument("--metrics-storage-uri", required=True, + help="URI to the storage where validation metrics are stored") + parser.add_argument("--sku", required=False, + default="Standard_NC24ads_A100_v4", + help="Suggested SKU based on benchmark results") + + args = parser.parse_args() + logger.info(f"Arguments: {args}") + + try: + result = update_model_onboarding_version( + args.publisher_name, + args.model_name, + args.model_version, + args.selfserve_base_url, + args.sku, + args.metrics_storage_uri + ) + logger.info("Model onboarding version update completed successfully") + except Exception as e: + logger.error(f"Failed to update model onboarding version: {e}") + sys.exit(1) diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py new file mode 100644 index 0000000000..9129a19ade --- /dev/null +++ b/assets/training/model_management/src/run_inference_validation.py @@ -0,0 +1,232 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Validate the structure of expected and actual inference response JSON files.""" + +import json +import argparse +import os +from azureml.core import Run +from azureml.model.mgmt.utils.common_utils import get_mlclient +from azureml.model.mgmt.config import AppName +from azureml.model.mgmt.utils.logging_utils import custom_dimensions, get_logger + + +logger = get_logger(__name__) +custom_dimensions.app_name = AppName.RUN_INFERENCE_VALIDATION + + +def load_json(file_path): + """Load JSON data from a file. If the loaded data is a string, try to parse it as JSON.""" + try: + with open(file_path, "r") as f: + data = json.load(f) + # If data is a string, parse it as JSON. + if isinstance(data, str): + try: + data = json.loads(data) + except Exception as e: + logger.warning(f"Error parsing JSON from string in {file_path}: {e}") + return data + except Exception as e: + logger.warning(f"Error loading JSON file {file_path}: {e}") + return None + + +def get_json_structure(data): + """ + Recursively extract the structure of JSON (keys only). + + For dictionaries, returns a dict of keys mapped to their structure. + For lists, returns a list with the structure of the first element. + For other types, returns None. + """ + if isinstance(data, dict): + return {key: get_json_structure(value) for key, value in data.items()} + elif isinstance(data, list) and len(data) > 0: + # Assume all elements share the same structure and return the structure of the first element. + return [get_json_structure(data[0])] + else: + return None + + +def compare_structures(inference_payload, expected_response, inference_response): + """ + Compare JSON structures (keys only) of expected and actual. + + Returns a dictionary with validation results. + """ + expected_structure = get_json_structure(expected_response) + actual_structure = get_json_structure(inference_response) + logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}") + + result = { + "inference_payload": inference_payload, + "inference_output": inference_response, + "structure_match": expected_structure == actual_structure, + "expected_structure": expected_structure, + "actual_structure": actual_structure, + "differences": [] + } + + if not result["structure_match"]: + result["differences"] = [ + {"expected": expected_structure, "actual": actual_structure} + ] + logger.info(f"result: {result}") + return result + + +def save_validation_result(result, output_path): + """Save validation results to a JSON file.""" + try: + with open(output_path, "w") as f: + json.dump(result, f, indent=4) + logger.info(f"Validation result saved to {output_path}") + except Exception as e: + logger.error(f"Error saving validation result: {e}") + +def replace_name_in_path(path_template, name_value): + """Replace the placeholder in the output path with the actual job name.""" + return path_template.replace('${{name}}', name_value) + +def fetch_storage_uri(): + """Return the storage URI of the output file from the AzureML pipeline run.""" + try: + run = Run.get_context() + run_details = run.get_details() + output_data_path = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']['path'] + + output_data_uri = replace_name_in_path(output_data_path, run.id) + + logger.info(f"Output data URI: {output_data_uri}, output_data_path: {output_data_path}") + + # Extract datastore name and path from the AzureML URI + datastore_name, path = extract_datastore_info(output_data_uri) + + # Construct the storage URI + storage_uri = get_storage_url(datastore_name) + full_storage_uri = f"{storage_uri}/{path}" + logger.info(f"Full storage URI: {full_storage_uri}") + + return full_storage_uri + except Exception as e: + logger.error(f"Error fetching storage URI: {e}") + return None + + +def store_metrics_paths(metrics_file_path): + """Store the paths of the metrics CSV files in a JSON file.""" + base_path = fetch_storage_uri() + + logger.info(f"validation_result_path: {base_path}") + result_dict = {} + result_dict['api_inference_path'] = base_path + if result_dict: + write_results_to_file(result_dict, metrics_file_path) + + +def fetch_path(output_dir): + """Return the relative path of the data from the output directory.""" + try: + # Calculate relative path from the job folder + rel_path = os.path.relpath(output_dir, os.getcwd()) + logger.info(f"rel_path: {rel_path}") + result_dict = { + 'api_inference_path': rel_path + } + return result_dict + except Exception as e: + logger.error(f"Error calculating relative path: {e}") + return {} + + +def write_results_to_file(results_dict, file_path): + """Write the results dictionary to a JSON file.""" + try: + with open(file_path, 'w') as f: + json.dump(results_dict, f, indent=4) + logger.info(f"Results written to {file_path} in JSON format") + return True + except Exception as e: + logger.error(f"Error writing to file: {e}") + return False + + +def get_storage_url(datastore_name): + """Retrieve the storage URL for the specified datastore.""" + # Get MLClient instance + ml_client = get_mlclient() + logger.info(f"ml_client: {ml_client}") + datastore = ml_client.datastores.get(datastore_name) + storage_account_name = datastore.account_name + container_name = datastore.container_name + endpoint = datastore.endpoint + + storage_uri = f"https://{storage_account_name}.blob.{endpoint}/{container_name}" + logger.info(f"storage_uri: {storage_uri}") + + return storage_uri + + +def extract_datastore_info(datastore_uri_path): + """Extract both datastore name and path from an Azure ML datastore URI path.""" + # Check if it's a valid datastore URI + if not datastore_uri_path.startswith('azureml://datastores/'): + return None, None + + parts = datastore_uri_path.split('/') + + # The datastore name should be the part after 'datastores/' + if len(parts) >= 5 and parts[0] == 'azureml:' and parts[1] == '' and parts[2] == 'datastores' and 'paths' in parts: + datastore_name = parts[3] + + # Find the index of 'paths' in the URI + paths_index = parts.index('paths') + + # Join everything after 'paths/' to form the path + path = '/'.join(parts[(paths_index + 1):]) + + return datastore_name, path + + return None, None + + +def main(): + """Compare expected and actual inference response structures.""" + parser = argparse.ArgumentParser() + parser.add_argument("--inference_payload", type=str, required=True, + help="Path to the expected inference response JSON file.") + parser.add_argument("--expected_response", type=str, required=True, + help="Path to the expected inference response JSON file.") + parser.add_argument("--inference_response", type=str, required=True, + help="Path to the actual inference response JSON file.") + parser.add_argument("--validation_result", type=str, required=True, + help="Path to save validation results.") + parser.add_argument("--metrics_storage_uri", type=str, required=True, + help="Path to store the metrics.") + + args = parser.parse_args() + + # Load expected and actual responses. + inference_payload = load_json(args.inference_payload) + expected_response = load_json(args.expected_response) + inference_response = load_json(args.inference_response) + logger.info(f"expected response: {expected_response}, actual response: {inference_response}") + + if expected_response is None or inference_response is None: + logger.warning("One or both JSON files could not be loaded.") + return + + # Compare the JSON structures. + validation_result = compare_structures(inference_payload, expected_response, inference_response) + + # Save the validation result. + save_validation_result(validation_result, args.validation_result) + logger.info(f"validation_result: {validation_result}, Validation result saved to {args.validation_result}") + + store_metrics_paths(args.metrics_storage_uri) + + +if __name__ == "__main__": + main() From eecdceab45d9f37ddb98520ae7002beca7339590 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 28 Mar 2025 12:07:06 +0530 Subject: [PATCH 02/29] AML pipeline update to run the validation and publish result for maap self serve --- .../components/delete_endpoint/spec.yaml | 2 +- .../common/components/deploy_model/spec.yaml | 8 +++-- assets/common/src/deploy.py | 16 +++++++++ .../run_inference_validation/spec.yaml | 3 +- .../validate_model_inference/spec.yaml | 3 +- .../publish_validation_results_selfserve.py | 15 ++++---- .../src/run_inference_validation.py | 34 +++++++++++-------- 7 files changed, 55 insertions(+), 26 deletions(-) diff --git a/assets/common/components/delete_endpoint/spec.yaml b/assets/common/components/delete_endpoint/spec.yaml index 20d0f24c34..9e55975a1f 100644 --- a/assets/common/components/delete_endpoint/spec.yaml +++ b/assets/common/components/delete_endpoint/spec.yaml @@ -9,7 +9,7 @@ display_name: Delete Endpoint description: Deletes an endpoint resource. -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19 +environment: azureml://registries/azureml/environments/python-sdk-v2/versions/28 code: ../../src command: >- diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index db1670adeb..5add6652a6 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.11 +version: 0.0.12 type: command is_deterministic: True @@ -9,7 +9,7 @@ display_name: Deploy model description: Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19 +environment: azureml://registries/azureml/environments/python-sdk-v2/versions/28 code: ../../src command: >- @@ -35,6 +35,7 @@ command: >- $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]] $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]] --model_deployment_details ${{outputs.model_deployment_details}} + --inference_response ${{outputs.inference_response}} inputs: # Output of registering component @@ -202,6 +203,9 @@ outputs: model_deployment_details: type: uri_file description: Json file to which deployment details will be written + inference_response: + type: uri_file + description: JSON file containing inference results tags: Preview: "" diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index a4afa64493..8920ba1792 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -162,6 +162,11 @@ def parse_args(): type=str, help="Json file to which deployment details will be written", ) + parser.add_argument( + "--inference_response", + type=str, + help="Json file to save the inference response", + ) # parse args args = parser.parse_args() logger.info(f"Args received {args}") @@ -301,6 +306,7 @@ def main(): args=args ) + inference_result = None if args.inference_payload: print("Invoking inference with test payload ...") try: @@ -311,6 +317,12 @@ def main(): ) print(f"Response:\n{response}") logger.info(f"Endpoint invoked successfully with response :{response}") + # Save inference response + inference_result = response + if args.inference_response: + with open(args.inference_response, "w") as f: + json.dump(inference_result, f, indent=4) + logger.info("Saved inference response to output JSON file.") except Exception as e: raise AzureMLException._with_error( AzureMLError.create(OnlineEndpointInvocationError, exception=e) @@ -334,6 +346,10 @@ def main(): outfile.write(json_object) logger.info("Saved deployment details in output json file.") + if response and args.inference_response: + with open(args.inference_response, "w") as outfile: + outfile.write(response) + logger.info("Saved inference response in output json file.") # run script if __name__ == "__main__": diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index 91c38775a6..4859e8d763 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -14,7 +14,7 @@ code: ../../src command: >- python run_inference_validation.py --inference_payload ${{inputs.inference_payload}} - --expected_response ${{inputs.expected_response}} + $[[--expected_response ${{inputs.expected_response}}]] --inference_response ${{inputs.inference_response}} --validation_result ${{outputs.validation_result}} --metrics_storage_uri ${{outputs.metrics_storage_uri}} @@ -26,6 +26,7 @@ inputs: expected_response: type: uri_file + optional: true description: JSON file containing the expected inference response. inference_response: diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index e82eab4ab2..ebf3150e53 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -118,6 +118,7 @@ inputs: expected_inference_response: type: uri_file + optional: true description: JSON file containing the expected inference response. # Pipeline outputs @@ -129,7 +130,7 @@ outputs: jobs: online_deployment_model: type: command - component: azureml:/deploy_model:0.0.12 + component: azureml:deploy_model:0.0.12 compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index 5b92e3bdff..cbdfe14621 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -4,17 +4,18 @@ import os import uuid import json -import logging import requests import argparse from datetime import datetime, timezone from azure.identity import ManagedIdentityCredential from azure.ai.ml.identity import AzureMLOnBehalfOfCredential from datetime import datetime +from azureml.model.mgmt.config import AppName +from azureml.model.mgmt.utils.logging_utils import custom_dimensions, get_logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +logger = get_logger(__name__) +custom_dimensions.app_name = AppName.PUBHLISH_VALIDATION_RESULTS_SELF_SERVE def read_results_from_file(file_path): @@ -121,7 +122,7 @@ def update_model_onboarding_version( response = requests.put(api_url, headers=headers, json=payload) - logger.info(f"Response: {response.json()}") + logger.info(f"Response: {response.text}") if response.ok: logger.info( @@ -130,11 +131,11 @@ def update_model_onboarding_version( else: logger.error( f"Failed to update model onboarding version. Status code: {response.status_code}") - logger.error(f"Response content: {response.json()}") + logger.error(f"Response content: {response.text}") raise Exception( - f"Request failed with status code {response.status_code}: {response.json()}") + f"Request failed with status code {response.status_code}: {response.text}") except requests.RequestException as e: - logging.error(f"Request failed: {e}") + logger.error(f"Request failed: {e}") raise diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 9129a19ade..0edbb65841 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -73,7 +73,7 @@ def compare_structures(inference_payload, expected_response, inference_response) result["differences"] = [ {"expected": expected_structure, "actual": actual_structure} ] - logger.info(f"result: {result}") + logger.info(f"validation result: {result}") return result @@ -99,8 +99,6 @@ def fetch_storage_uri(): output_data_uri = replace_name_in_path(output_data_path, run.id) - logger.info(f"Output data URI: {output_data_uri}, output_data_path: {output_data_path}") - # Extract datastore name and path from the AzureML URI datastore_name, path = extract_datastore_info(output_data_uri) @@ -131,7 +129,7 @@ def fetch_path(output_dir): try: # Calculate relative path from the job folder rel_path = os.path.relpath(output_dir, os.getcwd()) - logger.info(f"rel_path: {rel_path}") + logger.info(f"api inference validation relative path: {rel_path}") result_dict = { 'api_inference_path': rel_path } @@ -157,14 +155,13 @@ def get_storage_url(datastore_name): """Retrieve the storage URL for the specified datastore.""" # Get MLClient instance ml_client = get_mlclient() - logger.info(f"ml_client: {ml_client}") datastore = ml_client.datastores.get(datastore_name) storage_account_name = datastore.account_name container_name = datastore.container_name endpoint = datastore.endpoint storage_uri = f"https://{storage_account_name}.blob.{endpoint}/{container_name}" - logger.info(f"storage_uri: {storage_uri}") + logger.info(f"validation result storage: {storage_uri}") return storage_uri @@ -197,7 +194,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("--inference_payload", type=str, required=True, help="Path to the expected inference response JSON file.") - parser.add_argument("--expected_response", type=str, required=True, + parser.add_argument("--expected_response", type=str, required=False, help="Path to the expected inference response JSON file.") parser.add_argument("--inference_response", type=str, required=True, help="Path to the actual inference response JSON file.") @@ -210,16 +207,25 @@ def main(): # Load expected and actual responses. inference_payload = load_json(args.inference_payload) - expected_response = load_json(args.expected_response) inference_response = load_json(args.inference_response) + if args.expected_response is None: + expected_response = load_json(args.expected_response) + else: + expected_response = None logger.info(f"expected response: {expected_response}, actual response: {inference_response}") - if expected_response is None or inference_response is None: - logger.warning("One or both JSON files could not be loaded.") - return - - # Compare the JSON structures. - validation_result = compare_structures(inference_payload, expected_response, inference_response) + if expected_response: + validation_result = compare_structures(inference_payload, expected_response, inference_response) + else: + validation_result = { + "inference_payload": inference_payload, + "inference_output": inference_response, + "structure_match": None, + "expected_structure": None, + "actual_structure": get_json_structure(inference_response), + "differences": [] + } + logger.info("No expected response provided. Skipping structure comparison.") # Save the validation result. save_validation_result(validation_result, args.validation_result) From d577a6d6dcd08efeed4d6799b2a03b19925a88e8 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 31 Mar 2025 10:44:03 +0530 Subject: [PATCH 03/29] Updated MaaP selfserve validation pipeline component --- .../training/model_management/src/azureml/model/mgmt/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/training/model_management/src/azureml/model/mgmt/config.py b/assets/training/model_management/src/azureml/model/mgmt/config.py index 7de73aa0fe..114e1dcc03 100644 --- a/assets/training/model_management/src/azureml/model/mgmt/config.py +++ b/assets/training/model_management/src/azureml/model/mgmt/config.py @@ -48,6 +48,7 @@ class AppName: CONVERT_MODEL_TO_MLFLOW = "convert_model_to_mlflow" VALIDATION_TRIGGER_IMPORT = "validation_trigger_import" RUN_INFERENCE_VALIDATION = "run_inference_validation" + PUBHLISH_VALIDATION_RESULTS_SELF_SERVE = "publish_validation_results_self_serve" class LoggerConfig: From 7faef922a24a7d30e323e3a4c1046eda446a0351 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 31 Mar 2025 13:53:06 +0530 Subject: [PATCH 04/29] Updated the deploy aml component --- assets/common/src/deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 8920ba1792..753d4e9141 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -165,7 +165,7 @@ def parse_args(): parser.add_argument( "--inference_response", type=str, - help="Json file to save the inference response", + help="Path to the inference response JSON file.", ) # parse args args = parser.parse_args() From ba4e70f4af4b0b2aa52d7bcc94726a156e335983 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 31 Mar 2025 13:59:37 +0530 Subject: [PATCH 05/29] renamed publish_result component --- .../asset.yaml | 0 .../spec.yaml | 0 .../src/publish_validation_results_selfserve.py | 3 +++ 3 files changed, 3 insertions(+) rename assets/training/model_management/components/{publish_result_selfserve => publish_validation_results_selfserve}/asset.yaml (100%) rename assets/training/model_management/components/{publish_result_selfserve => publish_validation_results_selfserve}/spec.yaml (100%) diff --git a/assets/training/model_management/components/publish_result_selfserve/asset.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/asset.yaml similarity index 100% rename from assets/training/model_management/components/publish_result_selfserve/asset.yaml rename to assets/training/model_management/components/publish_validation_results_selfserve/asset.yaml diff --git a/assets/training/model_management/components/publish_result_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml similarity index 100% rename from assets/training/model_management/components/publish_result_selfserve/spec.yaml rename to assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index cbdfe14621..a0d7290df5 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + """Update model onboarding version with CommonBench results.""" import sys From d544040276ef092433ff92e0a772b10efbb6e6de Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 31 Mar 2025 20:40:27 +0530 Subject: [PATCH 06/29] updated the publish result component --- .../spec.yaml | 1 + .../publish_validation_results_selfserve.py | 125 ++++++++++-------- 2 files changed, 68 insertions(+), 58 deletions(-) diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml index 23ad9f9d40..85079a786b 100644 --- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml +++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml @@ -47,5 +47,6 @@ command: >- --model-name ${{inputs.model_name}} --model-version ${{inputs.model_version}} --publisher-name ${{inputs.publisher_name}} + --validation-id ${{inputs.validation_id}} --sku ${{inputs.sku}} --metrics-storage-uri ${{inputs.metrics_storage_uri}} \ No newline at end of file diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index a0d7290df5..16c87350b8 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -32,78 +32,87 @@ def read_results_from_file(file_path): print(f"Error reading from file: {e}") return None - -def update_model_onboarding_version( - publisher_name, - model_name, - model_version, - selfserve_base_url, - sku, - metrics_storage_uri -): - """Update model onboarding version with benchmark results.""" - current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - +def get_auth_token(): + """Generate auth token for Azure API.""" is_obo = False + tokenUri = "https://management.azure.com/.default" + token = None + try: credential = AzureMLOnBehalfOfCredential() - token = credential.get_token( - "https://management.azure.com/.default").token + token = credential.get_token(tokenUri).token is_obo = True - except Exception as ex: - logger.warning(f"Failed to get OBO credentials - {ex}") + except Exception: + logger.warning( + "Failed to get user credentials, fetching MSI credentials") if not is_obo: try: - logger.info("Fetching MSI credential") msi_client_id = os.environ.get("DEFAULT_IDENTITY_CLIENT_ID") credential = ManagedIdentityCredential(client_id=msi_client_id) - token = credential.get_token( - "https://management.azure.com/.default").token + token = credential.get_token(tokenUri).token except Exception as ex: - raise (f"Failed to get MSI credentials : {ex}") + raise Exception(f"Failed to get MSI credentials : {ex}") - metrics_path_dict = read_results_from_file(metrics_storage_uri) + return token - run_id = str(uuid.uuid4()) + +def update_model_onboarding_version( + publisher_name, + model_name, + model_version, + sku, + validation_id, + selfserve_base_url, + metrics_storage_uri +): + """Update model onboarding version with benchmark results.""" + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + metrics_path_dict = read_results_from_file(metrics_storage_uri) validation_result = [] - if metrics_path_dict.get("perf_bench_path") is not None: - validation_result.append({ - "runId": run_id, - "type": "PERF_BENCHMARK", - "passed": True, - "message": "Baseline data is captured successfully", - "validationResultUrl": metrics_path_dict.get("perf_bench_path"), - "createdTime": current_time, - "status": "success", - "sku": sku - }) - - if metrics_path_dict.get("api_validation_path") is not None: - validation_result.append({ - "runId": run_id, - "type": "API_VALIDATION", - "passed": True, - "message": "API validation passed successfully", - "validationResultUrl": metrics_path_dict.get("api_validation_path"), - "status": "success", - "createdTime": current_time, - "sku": sku - }) - - if metrics_path_dict.get("api_inference_path") is not None: - validation_result.append({ - "runId": run_id, - "type": "API_VALIDATION", - "passed": True, - "message": "API inference passed successfully", - "validationResultUrl": metrics_path_dict.get("api_inference_path"), - "status": "success", - "createdTime": current_time, - "sku": sku - }) + if validation_id is not None: + if metrics_path_dict.get("perf_bench_path") is not None: + validation_result.append({ + "Id": validation_id, + "type": "PERF_BENCHMARK", + "passed": True, + "message": "Baseline data is captured successfully", + "validationResultUrl": metrics_path_dict.get("perf_bench_path"), + "createdTime": current_time, + "status": "success", + "sku": sku + }) + + if metrics_path_dict.get("api_validation_path") is not None: + validation_result.append({ + "Id": validation_id, + "type": "API_VALIDATION", + "passed": True, + "message": "API validation passed successfully", + "validationResultUrl": metrics_path_dict.get("api_validation_path"), + "status": "success", + "createdTime": current_time, + "sku": sku + }) + + if metrics_path_dict.get("api_inference_path") is not None: + validation_result.append({ + "Id": validation_id, + "type": "API_VALIDATION", + "passed": True, + "message": "API inference passed successfully", + "validationResultUrl": metrics_path_dict.get("api_inference_path"), + "status": "success", + "createdTime": current_time, + "sku": sku + }) + else: + logger.error( + "Validation run ID is None, not updating validation results in self-serve") + sys.exit(1) payload = { "suggestedSKU": sku, @@ -115,7 +124,7 @@ def update_model_onboarding_version( api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31" headers = { - "Authorization": f"Bearer {token}", + "Authorization": f"Bearer {get_auth_token()}", "Content-Type": "application/json", "User-Agent": "AzureML-ModelPublishing/1.0" } From b0743b592d8b49b49078a61487b4bb5cc52e3693 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Tue, 1 Apr 2025 19:19:24 +0530 Subject: [PATCH 07/29] Updated publish validation result --- .../spec.yaml | 4 ++ .../validate_model_inference/spec.yaml | 6 ++ .../publish_validation_results_selfserve.py | 59 +++++-------------- 3 files changed, 24 insertions(+), 45 deletions(-) diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml index 85079a786b..531eb689a0 100644 --- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml +++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml @@ -33,6 +33,10 @@ inputs: optional: false default: "Standard_NC24ads_A100_v4" description: Suggested SKU based on benchmark results + validation_id: + type: string + optional: false + description: ID of the validation run (used for updating status in self-serve) metrics_storage_uri: type: uri_file optional: false diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index ebf3150e53..e461840015 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -116,6 +116,11 @@ inputs: default: default description: Name of the deployment + validation_id: + type: string + optional: true + description: ID of the validation run (used for updating status in self-serve) + expected_inference_response: type: uri_file optional: true @@ -177,5 +182,6 @@ jobs: model_name: ${{parent.inputs.model_name}} model_version: ${{parent.inputs.model_version}} sku: ${{parent.inputs.instance_type}} + validation_id: ${{parent.inputs.validation_id}} selfserve_base_url: ${{parent.inputs.selfserve_base_url}} metrics_storage_uri: ${{parent.jobs.run_inference_validation.outputs.metrics_storage_uri}} diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index 16c87350b8..c749b80705 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -12,7 +12,6 @@ from datetime import datetime, timezone from azure.identity import ManagedIdentityCredential from azure.ai.ml.identity import AzureMLOnBehalfOfCredential -from datetime import datetime from azureml.model.mgmt.config import AppName from azureml.model.mgmt.utils.logging_utils import custom_dimensions, get_logger @@ -71,57 +70,24 @@ def update_model_onboarding_version( metrics_path_dict = read_results_from_file(metrics_storage_uri) - validation_result = [] - - if validation_id is not None: - if metrics_path_dict.get("perf_bench_path") is not None: - validation_result.append({ - "Id": validation_id, - "type": "PERF_BENCHMARK", - "passed": True, - "message": "Baseline data is captured successfully", - "validationResultUrl": metrics_path_dict.get("perf_bench_path"), - "createdTime": current_time, - "status": "success", - "sku": sku - }) - - if metrics_path_dict.get("api_validation_path") is not None: - validation_result.append({ - "Id": validation_id, - "type": "API_VALIDATION", - "passed": True, - "message": "API validation passed successfully", - "validationResultUrl": metrics_path_dict.get("api_validation_path"), - "status": "success", - "createdTime": current_time, - "sku": sku - }) - - if metrics_path_dict.get("api_inference_path") is not None: - validation_result.append({ - "Id": validation_id, - "type": "API_VALIDATION", - "passed": True, - "message": "API inference passed successfully", - "validationResultUrl": metrics_path_dict.get("api_inference_path"), - "status": "success", - "createdTime": current_time, - "sku": sku - }) + validationResultUrl = None + + if validation_id: + if metrics_path_dict.get("api_inference_path"): + validationResultUrl = metrics_path_dict.get("api_inference_path") else: logger.error( "Validation run ID is None, not updating validation results in self-serve") sys.exit(1) payload = { - "suggestedSKU": sku, - "status": "Validation", - "subStatus": "Validation_Successful", - "validationResult": validation_result + "passed": True, + "status": "Completed", + "message": "Validation Successful", + "validationResult": validationResultUrl } - api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31" + api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/versions/{model_version}/validations/{validation_id}/updateValidationResult?api-version=2024-12-31" headers = { "Authorization": f"Bearer {get_auth_token()}", @@ -164,6 +130,8 @@ def update_model_onboarding_version( parser.add_argument("--selfserve-base-url", required=True, default="https://int.api.azureml-test.ms", help="Base URL of the model publisher self-serve API") + parser.add_argument("--validation-id", required=True, + help="Run ID of the validation run") parser.add_argument("--metrics-storage-uri", required=True, help="URI to the storage where validation metrics are stored") parser.add_argument("--sku", required=False, @@ -178,8 +146,9 @@ def update_model_onboarding_version( args.publisher_name, args.model_name, args.model_version, - args.selfserve_base_url, args.sku, + args.validation_id, + args.selfserve_base_url, args.metrics_storage_uri ) logger.info("Model onboarding version update completed successfully") From aebe16a8519d680895ee9c65f2e35fdb12cef83e Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 2 Apr 2025 14:22:18 +0530 Subject: [PATCH 08/29] Updated validation result publish API --- .../publish_validation_results_selfserve.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index c749b80705..adad260b9a 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -70,24 +70,33 @@ def update_model_onboarding_version( metrics_path_dict = read_results_from_file(metrics_storage_uri) - validationResultUrl = None + validation_result = [] if validation_id: - if metrics_path_dict.get("api_inference_path"): - validationResultUrl = metrics_path_dict.get("api_inference_path") + if metrics_path_dict.get("api_inference_path", None): + validation_result.append({ + "Id": validation_id, + "type": "API_VALIDATION", + "passed": True, + "message": "API inference passed successfully", + "validationResultUrl": metrics_path_dict.get("api_inference_path"), + "status": "success", + "createdTime": current_time, + "sku": sku + }) else: logger.error( - "Validation run ID is None, not updating validation results in self-serve") + "Validation ID is None, not updating validation results in self-serve") sys.exit(1) payload = { "passed": True, "status": "Completed", "message": "Validation Successful", - "validationResult": validationResultUrl + "validationResult": validation_result } - api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/versions/{model_version}/validations/{validation_id}/updateValidationResult?api-version=2024-12-31" + api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31" headers = { "Authorization": f"Bearer {get_auth_token()}", From 380ca49e88a466d7367dd0ac351540f5ebc32684 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Tue, 8 Apr 2025 12:43:26 +0530 Subject: [PATCH 09/29] Updated the AML pipeline validation component --- .../common/components/deploy_model/spec.yaml | 8 ++- assets/common/src/deploy.py | 55 ++++++++++++---- .../run_inference_validation/spec.yaml | 4 +- .../validate_model_inference/spec.yaml | 14 ++--- .../publish_validation_results_selfserve.py | 2 + .../src/run_inference_validation.py | 62 +++++++++++++------ 6 files changed, 104 insertions(+), 41 deletions(-) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index 5add6652a6..eea9f29b4a 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.12 +version: 0.0.12.10 type: command is_deterministic: True @@ -17,6 +17,7 @@ command: >- $[[--registration_details_folder ${{inputs.registration_details_folder}}]] $[[--model_id ${{inputs.model_id}}]] $[[--inference_payload ${{inputs.inference_payload}}]] + $[[--inference_payload_str ${{inputs.inference_payload_str}}]] $[[--endpoint_name ${{inputs.endpoint_name}}]] $[[--deployment_name ${{inputs.deployment_name}}]] $[[--instance_type ${{inputs.instance_type}}]] @@ -57,6 +58,11 @@ inputs: optional: true description: JSON payload which would be used to validate deployment + inference_payload_str: + type: string + optional: true + description: Serialized JSON payload which would be used to validate deployment + endpoint_name: type: string optional: true diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 753d4e9141..65a097ae34 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -6,6 +6,7 @@ import json import re import time +import base64 from azure.ai.ml.entities import ( ManagedOnlineEndpoint, @@ -60,6 +61,11 @@ def parse_args(): type=Path, help="Json file with inference endpoint payload.", ) + parser.add_argument( + "--inference_payload_str", + type=str, + help="Serialized JSON payload for inference.", + ) parser.add_argument( "--endpoint_name", type=str, @@ -306,23 +312,50 @@ def main(): args=args ) - inference_result = None - if args.inference_payload: + response = None + if args.inference_payload or args.inference_payload_str: print("Invoking inference with test payload ...") try: - response = ml_client.online_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - request_file=args.inference_payload, - ) - print(f"Response:\n{response}") - logger.info(f"Endpoint invoked successfully with response :{response}") + start_time = time.time() + if args.inference_payload_str: + print(f"Inference payload string: {args.inference_payload_str}") + decoded_bytes = base64.b64decode(args.inference_payload_str) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + + payload = json.loads(decoded_str) + logger.info(f"Payload:\n {payload}") + + with open("payload.json", "w") as temp_file: + json.dump(payload, temp_file) + + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file="payload.json", + ) + elif args.inference_payload: + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file=args.inference_payload, + ) + + end_time = time.time() + inference_time_ms = int((end_time - start_time) * 1000) + + logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms and response: {response}") # Save inference response - inference_result = response if args.inference_response: + inference_result = { + "response": response, + "inference_time": inference_time_ms + } with open(args.inference_response, "w") as f: json.dump(inference_result, f, indent=4) - logger.info("Saved inference response to output JSON file.") + logger.info("Saved inference response and inference time to output JSON file.") except Exception as e: raise AzureMLException._with_error( AzureMLError.create(OnlineEndpointInvocationError, exception=e) diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index 4859e8d763..adf4cd3c49 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: run_inference_validation -version: 0.0.1 +version: 0.0.1.17 type: command is_deterministic: True @@ -21,7 +21,7 @@ command: >- inputs: inference_payload: - type: uri_file + type: string description: JSON input payload used for inference. expected_response: diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index e461840015..8423b3ce28 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -4,7 +4,7 @@ type: pipeline name: validate_model_inference display_name: Validate Model Inference description: deploy a model and validate it using a sample payload -version: 0.0.1 +version: 0.0.1.17 experiment_name: validate_model @@ -101,7 +101,7 @@ inputs: description: SKU of the deployed model endpoint. inference_payload: - type: uri_file + type: string optional: true description: JSON payload which would be used to validate deployment @@ -135,11 +135,11 @@ outputs: jobs: online_deployment_model: type: command - component: azureml:deploy_model:0.0.12 + component: component: azureml:deploy_model:0.0.12.9 compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} - inference_payload: ${{parent.inputs.inference_payload}} + inference_payload_str: ${{parent.inputs.inference_payload}} endpoint_name: ${{parent.inputs.endpoint_name}} deployment_name: ${{parent.inputs.deployment_name}} instance_type: ${{parent.inputs.instance_type}} @@ -154,7 +154,7 @@ jobs: run_inference_validation: type: command - component: azureml:run_inference_validation:0.0.1 + component: component: azureml:run_inference_validation:0.0.1.17 identity: type: user_identity inputs: @@ -166,7 +166,7 @@ jobs: delete_endpoints: type: command - component: azureml:delete_endpoint:0.0.7 + component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1 inputs: model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} endpoint_name: ${{parent.inputs.endpoint_name}} @@ -176,7 +176,7 @@ jobs: publish_results: type: command - component: azureml:publish_validation_results_selfserve:0.0.1 + component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.8 inputs: publisher_name: ${{parent.inputs.publisher_name}} model_name: ${{parent.inputs.model_name}} diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index adad260b9a..b37c11b0e0 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -76,12 +76,14 @@ def update_model_onboarding_version( if metrics_path_dict.get("api_inference_path", None): validation_result.append({ "Id": validation_id, + "runId": validation_id, "type": "API_VALIDATION", "passed": True, "message": "API inference passed successfully", "validationResultUrl": metrics_path_dict.get("api_inference_path"), "status": "success", "createdTime": current_time, + "updatedTime": current_time, "sku": sku }) else: diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 0edbb65841..0c5f624985 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -33,6 +33,16 @@ def load_json(file_path): return None +def load_json_from_string(json_string): + """Load JSON data from a string.""" + try: + data = json.loads(json_string) + return data + except Exception as e: + logger.warning(f"Error parsing JSON from string: {e}") + return None + + def get_json_structure(data): """ Recursively extract the structure of JSON (keys only). @@ -50,7 +60,7 @@ def get_json_structure(data): return None -def compare_structures(inference_payload, expected_response, inference_response): +def compare_structures(inference_payload, expected_response, inference_response, success_status, inference_time): """ Compare JSON structures (keys only) of expected and actual. @@ -61,12 +71,13 @@ def compare_structures(inference_payload, expected_response, inference_response) logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}") result = { - "inference_payload": inference_payload, - "inference_output": inference_response, - "structure_match": expected_structure == actual_structure, - "expected_structure": expected_structure, - "actual_structure": actual_structure, - "differences": [] + "success": success_status, + "inference_time" : inference_time, + "sample_request": inference_payload, + "sample_response": expected_response, + "actual_response": inference_response, + "structure_match": expected_structure == actual_structure if expected_response else None, + "structural_difference": [] } if not result["structure_match"]: @@ -193,7 +204,7 @@ def main(): """Compare expected and actual inference response structures.""" parser = argparse.ArgumentParser() parser.add_argument("--inference_payload", type=str, required=True, - help="Path to the expected inference response JSON file.") + help="Serialized JSON payload for inference") parser.add_argument("--expected_response", type=str, required=False, help="Path to the expected inference response JSON file.") parser.add_argument("--inference_response", type=str, required=True, @@ -206,24 +217,35 @@ def main(): args = parser.parse_args() # Load expected and actual responses. - inference_payload = load_json(args.inference_payload) - inference_response = load_json(args.inference_response) - if args.expected_response is None: - expected_response = load_json(args.expected_response) - else: - expected_response = None + inference_payload = load_json_from_string(args.inference_payload) + inference_output = load_json(args.inference_response) + + expected_response = load_json(args.expected_response) if args.expected_response else None logger.info(f"expected response: {expected_response}, actual response: {inference_response}") + inference_response = inference_output.get("response") + inference_time = inference_output.get("inference_time_ms", 0) # Default to 0 if not present + + # Infer success status based on the presence of a valid response + success_status = inference_response is not None and bool(inference_response) + if expected_response: - validation_result = compare_structures(inference_payload, expected_response, inference_response) + validation_result = validation_result = compare_structures( + inference_payload, + expected_response, + inference_response, + success_status, + inference_time + ) else: validation_result = { - "inference_payload": inference_payload, - "inference_output": inference_response, + "success": success_status, + "inference_time": inference_time, + "sample_request": inference_payload, + "sample_response": expected_response, + "actual_response": inference_response, "structure_match": None, - "expected_structure": None, - "actual_structure": get_json_structure(inference_response), - "differences": [] + "actual_structure": [] } logger.info("No expected response provided. Skipping structure comparison.") From 29aa0cf80eb03bab60e253f49bc532a3f84c6c6a Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Wed, 9 Apr 2025 13:39:16 +0000 Subject: [PATCH 10/29] maap self-serve validation --- .../common/components/deploy_model/spec.yaml | 2 +- assets/common/src/deploy.py | 7 +- .../run_inference_validation/spec.yaml | 15 ++- .../validate_model_inference/spec.yaml | 14 +-- .../src/run_inference_validation.py | 110 +++++++++++------- 5 files changed, 89 insertions(+), 59 deletions(-) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index eea9f29b4a..71483ad276 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.12.10 +version: 0.0.12 type: command is_deterministic: True diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 65a097ae34..53424150cc 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -355,7 +355,7 @@ def main(): } with open(args.inference_response, "w") as f: json.dump(inference_result, f, indent=4) - logger.info("Saved inference response and inference time to output JSON file.") + logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}") except Exception as e: raise AzureMLException._with_error( AzureMLError.create(OnlineEndpointInvocationError, exception=e) @@ -379,11 +379,6 @@ def main(): outfile.write(json_object) logger.info("Saved deployment details in output json file.") - if response and args.inference_response: - with open(args.inference_response, "w") as outfile: - outfile.write(response) - logger.info("Saved inference response in output json file.") - # run script if __name__ == "__main__": # run main function diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index adf4cd3c49..7054de1bd8 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: run_inference_validation -version: 0.0.1.17 +version: 0.0.1 type: command is_deterministic: True @@ -16,6 +16,8 @@ command: >- --inference_payload ${{inputs.inference_payload}} $[[--expected_response ${{inputs.expected_response}}]] --inference_response ${{inputs.inference_response}} + --validation-id ${{inputs.validation_id}} + --sku ${{inputs.sku}} --validation_result ${{outputs.validation_result}} --metrics_storage_uri ${{outputs.metrics_storage_uri}} @@ -33,6 +35,17 @@ inputs: type: uri_file description: JSON file containing the actual inference response from the deployed model. + sku: + type: string + optional: false + default: "Standard_NC24ads_A100_v4" + description: Suggested SKU based on benchmark results + + validation_id: + type: string + optional: false + description: ID of the validation run (used for updating status in self-serve) + outputs: validation_result: type: uri_file diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index 8423b3ce28..45b1bb8575 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -4,9 +4,7 @@ type: pipeline name: validate_model_inference display_name: Validate Model Inference description: deploy a model and validate it using a sample payload -version: 0.0.1.17 - -experiment_name: validate_model +version: 0.0.1 inputs: compute: @@ -135,7 +133,7 @@ outputs: jobs: online_deployment_model: type: command - component: component: azureml:deploy_model:0.0.12.9 + component: azureml:deploy_model:0.0.12 compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} @@ -154,10 +152,12 @@ jobs: run_inference_validation: type: command - component: component: azureml:run_inference_validation:0.0.1.17 + component: azureml:run_inference_validation:0.0.1 identity: type: user_identity inputs: + validation_id: ${{parent.inputs.validation_id}} + sku: ${{parent.inputs.instance_type}} inference_payload: ${{parent.inputs.inference_payload}} expected_response: ${{parent.inputs.expected_inference_response}} inference_response: ${{parent.jobs.online_deployment_model.outputs.inference_response}} @@ -166,7 +166,7 @@ jobs: delete_endpoints: type: command - component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1 + component: azureml:delete_endpoint:0.0.7 inputs: model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} endpoint_name: ${{parent.inputs.endpoint_name}} @@ -176,7 +176,7 @@ jobs: publish_results: type: command - component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.8 + component: azureml:publish_validation_results_selfserve:0.0.1 inputs: publisher_name: ${{parent.inputs.publisher_name}} model_name: ${{parent.inputs.model_name}} diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 0c5f624985..f25e4e2a72 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -3,9 +3,12 @@ """Validate the structure of expected and actual inference response JSON files.""" +import base64 import json import argparse import os +import sys +from datetime import datetime, timezone from azureml.core import Run from azureml.model.mgmt.utils.common_utils import get_mlclient from azureml.model.mgmt.config import AppName @@ -59,44 +62,53 @@ def get_json_structure(data): else: return None - -def compare_structures(inference_payload, expected_response, inference_response, success_status, inference_time): +def compare_structures(expected_response, actual_response): """ Compare JSON structures (keys only) of expected and actual. - Returns a dictionary with validation results. + Returns a dictionary with structural differences and a match flag. """ expected_structure = get_json_structure(expected_response) - actual_structure = get_json_structure(inference_response) + actual_structure = get_json_structure(actual_response) logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}") - result = { - "success": success_status, - "inference_time" : inference_time, - "sample_request": inference_payload, - "sample_response": expected_response, - "actual_response": inference_response, - "structure_match": expected_structure == actual_structure if expected_response else None, - "structural_difference": [] - } + structure_match = expected_structure == actual_structure if expected_response else None + structural_difference = [] - if not result["structure_match"]: - result["differences"] = [ + if not structure_match: + structural_difference = [ {"expected": expected_structure, "actual": actual_structure} ] - logger.info(f"validation result: {result}") - return result + logger.info(f"Structure match: {structure_match}, Structural differences: {structural_difference}") + return { + "structure_match": structure_match, + "structural_difference": structural_difference + } -def save_validation_result(result, output_path): + +def save_validation_result(request_details, output_path, validation_id, sku, status): """Save validation results to a JSON file.""" try: + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + validation_result = { + "id": validation_id, + "runId": validation_id, + "sku": sku, + "createdTime": current_time, + "updatedTime": current_time, + "type": "MAAP_INFERENCING", + "status": status, + "requestDetails": request_details + } + with open(output_path, "w") as f: - json.dump(result, f, indent=4) + json.dump(validation_result, f, indent=4) logger.info(f"Validation result saved to {output_path}") except Exception as e: logger.error(f"Error saving validation result: {e}") + def replace_name_in_path(path_template, name_value): """Replace the placeholder in the output path with the actual job name.""" return path_template.replace('${{name}}', name_value) @@ -213,45 +225,55 @@ def main(): help="Path to save validation results.") parser.add_argument("--metrics_storage_uri", type=str, required=True, help="Path to store the metrics.") + parser.add_argument("--sku", required=False, + default="Standard_NC24ads_A100_v4", + help="Suggested SKU based on benchmark results") + parser.add_argument("--validation-id", required=True, + help="Run ID of the validation run") args = parser.parse_args() - # Load expected and actual responses. - inference_payload = load_json_from_string(args.inference_payload) + inference_payload = None + if args.inference_payload: + decoded_bytes = base64.b64decode(args.inference_payload) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + + inference_payload = json.loads(decoded_str) + + inference_output = load_json(args.inference_response) + if not inference_output: + logger.error("Inference output is missing or invalid.") + sys.exit(1) + inference_output = load_json(args.inference_response) expected_response = load_json(args.expected_response) if args.expected_response else None - logger.info(f"expected response: {expected_response}, actual response: {inference_response}") inference_response = inference_output.get("response") - inference_time = inference_output.get("inference_time_ms", 0) # Default to 0 if not present + inference_time = inference_output.get("inference_time", 0) + logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, actual response: {inference_response}") # Infer success status based on the presence of a valid response success_status = inference_response is not None and bool(inference_response) - + status = "Success" if success_status else "Failed" + + request_details = { + "inputRequest": inference_payload, + "inputResponse": expected_response, + "actualResponse": inference_response, + "responseTime": inference_time, + "structuralDiff": None, + } if expected_response: - validation_result = validation_result = compare_structures( - inference_payload, - expected_response, - inference_response, - success_status, - inference_time - ) - else: - validation_result = { - "success": success_status, - "inference_time": inference_time, - "sample_request": inference_payload, - "sample_response": expected_response, - "actual_response": inference_response, - "structure_match": None, - "actual_structure": [] - } - logger.info("No expected response provided. Skipping structure comparison.") + comparison_result = compare_structures(expected_response, inference_response) + request_details["structuralDiff"] = comparison_result.get("structural_difference", []) # Save the validation result. - save_validation_result(validation_result, args.validation_result) - logger.info(f"validation_result: {validation_result}, Validation result saved to {args.validation_result}") + save_validation_result(request_details, args.validation_result, args.validation_id, args.sku, status) + logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_result}") store_metrics_paths(args.metrics_storage_uri) From fbd837166e43fc8a866c65cedf91f1b87e77e25b Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 9 Apr 2025 19:39:10 +0530 Subject: [PATCH 11/29] maap-self-serve validation pipeline update --- assets/common/src/deploy.py | 2 +- .../src/publish_validation_results_selfserve.py | 2 +- .../model_management/src/run_inference_validation.py | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 53424150cc..6c42a3e93d 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -379,7 +379,7 @@ def main(): outfile.write(json_object) logger.info("Saved deployment details in output json file.") -# run script + if __name__ == "__main__": # run main function main() diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index b37c11b0e0..8e012e38d0 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -5,7 +5,6 @@ import sys import os -import uuid import json import requests import argparse @@ -31,6 +30,7 @@ def read_results_from_file(file_path): print(f"Error reading from file: {e}") return None + def get_auth_token(): """Generate auth token for Azure API.""" is_obo = False diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index f25e4e2a72..23d69ad338 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -62,6 +62,7 @@ def get_json_structure(data): else: return None + def compare_structures(expected_response, actual_response): """ Compare JSON structures (keys only) of expected and actual. @@ -113,23 +114,24 @@ def replace_name_in_path(path_template, name_value): """Replace the placeholder in the output path with the actual job name.""" return path_template.replace('${{name}}', name_value) + def fetch_storage_uri(): """Return the storage URI of the output file from the AzureML pipeline run.""" try: run = Run.get_context() run_details = run.get_details() output_data_path = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']['path'] - + output_data_uri = replace_name_in_path(output_data_path, run.id) # Extract datastore name and path from the AzureML URI datastore_name, path = extract_datastore_info(output_data_uri) - + # Construct the storage URI storage_uri = get_storage_url(datastore_name) full_storage_uri = f"{storage_uri}/{path}" logger.info(f"Full storage URI: {full_storage_uri}") - + return full_storage_uri except Exception as e: logger.error(f"Error fetching storage URI: {e}") From 1b020291a7b37dbee3bf075131b01f01bf484af4 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 9 Apr 2025 19:50:00 +0530 Subject: [PATCH 12/29] maap-self-serve fixed syntax issue --- assets/common/src/deploy.py | 3 ++- .../src/publish_validation_results_selfserve.py | 5 ++++- .../model_management/src/run_inference_validation.py | 6 ++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 6c42a3e93d..022f785997 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -346,7 +346,8 @@ def main(): end_time = time.time() inference_time_ms = int((end_time - start_time) * 1000) - logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms and response: {response}") + logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " + + f"and response: {response}") # Save inference response if args.inference_response: inference_result = { diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index 8e012e38d0..33e61f739b 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -98,7 +98,10 @@ def update_model_onboarding_version( "validationResult": validation_result } - api_url = f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31" + api_url = ( + f"{selfserve_base_url}/model-publisher-self-serve/publishers/{publisher_name}/models/{model_name}" + f"/model-onboarding-version/{model_version}/updateModelOnboardingVersion?api-version=2024-12-31" + ) headers = { "Authorization": f"Bearer {get_auth_token()}", diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 23d69ad338..96bf83ac05 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -120,7 +120,8 @@ def fetch_storage_uri(): try: run = Run.get_context() run_details = run.get_details() - output_data_path = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri']['path'] + output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri'] + output_data_path = output_data['path'] output_data_uri = replace_name_in_path(output_data_path, run.id) @@ -256,7 +257,8 @@ def main(): inference_response = inference_output.get("response") inference_time = inference_output.get("inference_time", 0) - logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, actual response: {inference_response}") + logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, " + f"actual response: {inference_response}") # Infer success status based on the presence of a valid response success_status = inference_response is not None and bool(inference_response) From 277bb0284f71ec6f5174e4e5e7e3e64c090397c5 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Thu, 10 Apr 2025 10:38:58 +0530 Subject: [PATCH 13/29] maap-self-serve fixed syntax issue --- .../training/model_management/src/run_inference_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 96bf83ac05..56cb1e4f69 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -268,7 +268,7 @@ def main(): "inputRequest": inference_payload, "inputResponse": expected_response, "actualResponse": inference_response, - "responseTime": inference_time, + "responseTimeMs": inference_time, "structuralDiff": None, } if expected_response: From 5c531946c950891cfaf2815e2ebe54df26ae6678 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Sun, 13 Apr 2025 22:27:33 +0530 Subject: [PATCH 14/29] Updated the environment used for deploy model component --- assets/common/components/deploy_model/spec.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index 71483ad276..7cf10a71ee 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.12 +version: 0.0.12.13 type: command is_deterministic: True @@ -9,7 +9,7 @@ display_name: Deploy model description: Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/28 +environment: azureml://registries/azureml/environments/python-sdk-v2/versions/29 code: ../../src command: >- From efa03c2b90d6550752c8d5706e85a272c1a12e1c Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Mon, 14 Apr 2025 03:37:41 +0000 Subject: [PATCH 15/29] Added inference response validation --- .../common/components/deploy_model/spec.yaml | 6 ++-- assets/common/src/deploy.py | 6 ++-- .../run_inference_validation/spec.yaml | 2 +- .../validate_model_inference/spec.yaml | 10 +++---- .../src/run_inference_validation.py | 28 +++++++++++++++---- 5 files changed, 34 insertions(+), 18 deletions(-) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index 7cf10a71ee..f97bb2f870 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.12.13 +version: 0.0.12 type: command is_deterministic: True @@ -36,7 +36,7 @@ command: >- $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]] $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]] --model_deployment_details ${{outputs.model_deployment_details}} - --inference_response ${{outputs.inference_response}} + --model_inference_response ${{outputs.model_inference_response}} inputs: # Output of registering component @@ -209,7 +209,7 @@ outputs: model_deployment_details: type: uri_file description: Json file to which deployment details will be written - inference_response: + model_inference_response: type: uri_file description: JSON file containing inference results diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 022f785997..a4896c4dd7 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -169,7 +169,7 @@ def parse_args(): help="Json file to which deployment details will be written", ) parser.add_argument( - "--inference_response", + "--model_inference_response", type=str, help="Path to the inference response JSON file.", ) @@ -349,12 +349,12 @@ def main(): logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " + f"and response: {response}") # Save inference response - if args.inference_response: + if args.model_inference_response: inference_result = { "response": response, "inference_time": inference_time_ms } - with open(args.inference_response, "w") as f: + with open(args.model_inference_response, "w") as f: json.dump(inference_result, f, indent=4) logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}") except Exception as e: diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index 7054de1bd8..3c599b548a 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -27,7 +27,7 @@ inputs: description: JSON input payload used for inference. expected_response: - type: uri_file + type: string optional: true description: JSON file containing the expected inference response. diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index 45b1bb8575..20234e5de5 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -119,8 +119,8 @@ inputs: optional: true description: ID of the validation run (used for updating status in self-serve) - expected_inference_response: - type: uri_file + inference_response: + type: string optional: true description: JSON file containing the expected inference response. @@ -147,7 +147,7 @@ jobs: outputs: model_deployment_details: type: uri_file - inference_response: + model_inference_response: type: uri_file run_inference_validation: @@ -159,8 +159,8 @@ jobs: validation_id: ${{parent.inputs.validation_id}} sku: ${{parent.inputs.instance_type}} inference_payload: ${{parent.inputs.inference_payload}} - expected_response: ${{parent.inputs.expected_inference_response}} - inference_response: ${{parent.jobs.online_deployment_model.outputs.inference_response}} + expected_response: ${{parent.inputs.inference_response}} + inference_response: ${{parent.jobs.online_deployment_model.outputs.model_inference_response}} outputs: validation_result: ${{parent.outputs.validation_result}} diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 56cb1e4f69..1209944ebe 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -94,7 +94,6 @@ def save_validation_result(request_details, output_path, validation_id, sku, sta current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") validation_result = { "id": validation_id, - "runId": validation_id, "sku": sku, "createdTime": current_time, "updatedTime": current_time, @@ -246,16 +245,32 @@ def main(): inference_payload = json.loads(decoded_str) + expected_response = None + if args.expected_response: + decoded_bytes = base64.b64decode(args.expected_response) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + expected_response = json.loads(decoded_str) + + inference_output = load_json(args.inference_response) if not inference_output: logger.error("Inference output is missing or invalid.") sys.exit(1) - inference_output = load_json(args.inference_response) + inference_response = inference_output.get("response") + if isinstance(inference_response, str): + try: + inference_response = json.loads(inference_response) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse actualResponse as JSON: {e}") - expected_response = load_json(args.expected_response) if args.expected_response else None + if inference_response is None: + logger.warning("Actual response is missing or invalid. Setting it to an empty structure.") + inference_response = {} - inference_response = inference_output.get("response") inference_time = inference_output.get("inference_time", 0) logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, " f"actual response: {inference_response}") @@ -265,10 +280,11 @@ def main(): status = "Success" if success_status else "Failed" request_details = { - "inputRequest": inference_payload, - "inputResponse": expected_response, + "providedRequest": inference_payload, + "providedResponse": expected_response, "actualResponse": inference_response, "responseTimeMs": inference_time, + "errorMessage": None, "structuralDiff": None, } if expected_response: From 62c659f77beba227942cabd43e310eeb5ac78909 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Mon, 14 Apr 2025 09:48:49 +0000 Subject: [PATCH 16/29] Updated the validation result file extension --- .../src/publish_validation_results_selfserve.py | 15 +++++++++++++-- .../src/run_inference_validation.py | 6 +++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index 33e61f739b..c327b48c1c 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -76,12 +76,23 @@ def update_model_onboarding_version( if metrics_path_dict.get("api_inference_path", None): validation_result.append({ "Id": validation_id, - "runId": validation_id, "type": "API_VALIDATION", "passed": True, "message": "API inference passed successfully", "validationResultUrl": metrics_path_dict.get("api_inference_path"), - "status": "success", + "status": "Completed", + "createdTime": current_time, + "updatedTime": current_time, + "sku": sku + }) + else: + validation_result.append({ + "Id": validation_id, + "type": "API_VALIDATION", + "passed": True, + "message": "API inference passed successfully", + "validationResultUrl": metrics_path_dict.get("api_inference_path"), + "status": "Failed", "createdTime": current_time, "updatedTime": current_time, "sku": sku diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 1209944ebe..0deeb40504 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -91,6 +91,9 @@ def compare_structures(expected_response, actual_response): def save_validation_result(request_details, output_path, validation_id, sku, status): """Save validation results to a JSON file.""" try: + if not output_path.endswith(".json"): + output_path += ".json" + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") validation_result = { "id": validation_id, @@ -122,8 +125,9 @@ def fetch_storage_uri(): output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri'] output_data_path = output_data['path'] + if not output_data_path.endswith(".json"): + output_data_path += ".json" output_data_uri = replace_name_in_path(output_data_path, run.id) - # Extract datastore name and path from the AzureML URI datastore_name, path = extract_datastore_info(output_data_uri) From 82262e90cd012c2827c49a8ca50d10ea6eee573a Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Tue, 15 Apr 2025 03:24:17 +0000 Subject: [PATCH 17/29] Download validation result with json file extension --- .../run_inference_validation/spec.yaml | 2 +- .../src/run_inference_validation.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index 3c599b548a..bbc259828f 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -48,7 +48,7 @@ inputs: outputs: validation_result: - type: uri_file + type: uri_folder description: JSON file containing the validation results. metrics_storage_uri: type: uri_file diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 0deeb40504..780e592b11 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -88,11 +88,11 @@ def compare_structures(expected_response, actual_response): } -def save_validation_result(request_details, output_path, validation_id, sku, status): +def save_validation_result(request_details, output_dir, validation_id, sku, status): """Save validation results to a JSON file.""" try: - if not output_path.endswith(".json"): - output_path += ".json" + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, "validation_result.json") current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") validation_result = { @@ -125,18 +125,19 @@ def fetch_storage_uri(): output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri'] output_data_path = output_data['path'] - if not output_data_path.endswith(".json"): - output_data_path += ".json" output_data_uri = replace_name_in_path(output_data_path, run.id) # Extract datastore name and path from the AzureML URI datastore_name, path = extract_datastore_info(output_data_uri) # Construct the storage URI storage_uri = get_storage_url(datastore_name) - full_storage_uri = f"{storage_uri}/{path}" - logger.info(f"Full storage URI: {full_storage_uri}") + folder_uri = f"{storage_uri}/{path}" + # Construct the full path to the validation_result.json file + full_file_uri = f"{folder_uri}/validation_result.json" - return full_storage_uri + logger.info(f"Full storage URI (file): {full_file_uri}") + + return full_file_uri # This is the full path to validation_result.json except Exception as e: logger.error(f"Error fetching storage URI: {e}") return None From e57a8b1a45d6c41aed52a5dd86deb1833982870b Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Wed, 16 Apr 2025 06:07:47 +0000 Subject: [PATCH 18/29] Updated the instance_type sku list --- assets/common/components/deploy_model/spec.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index f97bb2f870..066afde4ac 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -111,6 +111,9 @@ inputs: - Standard_NC24s_v2 - Standard_NC24s_v3 - Standard_NC24rs_v3 + - Standard_NC24ads_A100_v4 + - Standard_NC48ads_A100_v4 + - Standard_NC96ads_A100_v4 - Standard_NC64as_T4_v3 - Standard_ND40rs_v2 - Standard_ND96asr_v4 From 16860cd4498ef53a210639ef38a19a8e33f5a985 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Tue, 22 Apr 2025 14:16:33 +0000 Subject: [PATCH 19/29] Maap self serve save validation result fix --- assets/common/src/deploy.py | 2 +- .../run_inference_validation/spec.yaml | 4 ++-- .../validate_model_inference/spec.yaml | 16 +++++----------- .../src/run_inference_validation.py | 13 +++++++++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index a4896c4dd7..5960f8c1da 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -191,7 +191,7 @@ def parse_args(): def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args): """Create endpoint and deployment and return details.""" - endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="key") + endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token") # deployment deployment = ManagedOnlineDeployment( diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index bbc259828f..0af7fb5918 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -18,7 +18,7 @@ command: >- --inference_response ${{inputs.inference_response}} --validation-id ${{inputs.validation_id}} --sku ${{inputs.sku}} - --validation_result ${{outputs.validation_result}} + --validation_results ${{outputs.validation_results}} --metrics_storage_uri ${{outputs.metrics_storage_uri}} inputs: @@ -47,7 +47,7 @@ inputs: description: ID of the validation run (used for updating status in self-serve) outputs: - validation_result: + validation_results: type: uri_folder description: JSON file containing the validation results. metrics_storage_uri: diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index 20234e5de5..3fcc788a81 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -65,7 +65,7 @@ inputs: model_id: type: string - optional: true + optional: false description: | Asset ID of the model registered in workspace/registry. Registry - azureml://registries//models//versions/ @@ -100,7 +100,7 @@ inputs: inference_payload: type: string - optional: true + optional: false description: JSON payload which would be used to validate deployment endpoint_name: @@ -126,9 +126,9 @@ inputs: # Pipeline outputs outputs: - validation_result: + validation_results: description: Output file containing the validation results. - type: uri_file + type: uri_folder jobs: online_deployment_model: @@ -142,8 +142,6 @@ jobs: deployment_name: ${{parent.inputs.deployment_name}} instance_type: ${{parent.inputs.instance_type}} instance_count: ${{parent.inputs.instance_count}} - identity: - type: user_identity outputs: model_deployment_details: type: uri_file @@ -153,8 +151,6 @@ jobs: run_inference_validation: type: command component: azureml:run_inference_validation:0.0.1 - identity: - type: user_identity inputs: validation_id: ${{parent.inputs.validation_id}} sku: ${{parent.inputs.instance_type}} @@ -162,7 +158,7 @@ jobs: expected_response: ${{parent.inputs.inference_response}} inference_response: ${{parent.jobs.online_deployment_model.outputs.model_inference_response}} outputs: - validation_result: ${{parent.outputs.validation_result}} + validation_results: ${{parent.outputs.validation_results}} delete_endpoints: type: command @@ -170,8 +166,6 @@ jobs: inputs: model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} endpoint_name: ${{parent.inputs.endpoint_name}} - identity: - type: user_identity compute: ${{parent.inputs.compute}} publish_results: diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 780e592b11..b5012d2e15 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -91,8 +91,12 @@ def compare_structures(expected_response, actual_response): def save_validation_result(request_details, output_dir, validation_id, sku, status): """Save validation results to a JSON file.""" try: + logger.info(f"Saving validation result to {output_dir}") + # Create the output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) + logger.info(f"Output directory: {output_dir}") output_path = os.path.join(output_dir, "validation_result.json") + logger.info(f"Output path: {output_path}") current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") validation_result = { @@ -110,6 +114,7 @@ def save_validation_result(request_details, output_dir, validation_id, sku, stat logger.info(f"Validation result saved to {output_path}") except Exception as e: logger.error(f"Error saving validation result: {e}") + raise Exception(f"Failed to get MSI credentials : {e}") def replace_name_in_path(path_template, name_value): @@ -122,7 +127,7 @@ def fetch_storage_uri(): try: run = Run.get_context() run_details = run.get_details() - output_data = run_details['runDefinition']['outputData']['validation_result']['outputLocation']['uri'] + output_data = run_details['runDefinition']['outputData']['validation_results']['outputLocation']['uri'] output_data_path = output_data['path'] output_data_uri = replace_name_in_path(output_data_path, run.id) @@ -228,7 +233,7 @@ def main(): help="Path to the expected inference response JSON file.") parser.add_argument("--inference_response", type=str, required=True, help="Path to the actual inference response JSON file.") - parser.add_argument("--validation_result", type=str, required=True, + parser.add_argument("--validation_results", type=str, required=True, help="Path to save validation results.") parser.add_argument("--metrics_storage_uri", type=str, required=True, help="Path to store the metrics.") @@ -297,8 +302,8 @@ def main(): request_details["structuralDiff"] = comparison_result.get("structural_difference", []) # Save the validation result. - save_validation_result(request_details, args.validation_result, args.validation_id, args.sku, status) - logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_result}") + save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, status) + logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_results}") store_metrics_paths(args.metrics_storage_uri) From 13861db17b03c65b4ef40e948dd7d178e5898f8e Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Tue, 22 Apr 2025 15:10:40 +0000 Subject: [PATCH 20/29] Maap self serve save validation result fix --- .../components/validate_model_inference/spec.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index 3fcc788a81..3ffb775a32 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -50,6 +50,9 @@ inputs: - Standard_NC24s_v2 - Standard_NC24s_v3 - Standard_NC24rs_v3 + - Standard_NC24ads_A100_v4 + - Standard_NC48ads_A100_v4 + - Standard_NC96ads_A100_v4 - Standard_NC64as_T4_v3 - Standard_ND40rs_v2 - Standard_ND96asr_v4 From d41d268ef090b6aefb2a070a24b7a13ea0323ef7 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Wed, 7 May 2025 12:30:36 +0000 Subject: [PATCH 21/29] Publish validation result for both success and failure case --- .../spec.yaml | 4 +-- .../run_inference_validation/spec.yaml | 3 +- .../publish_validation_results_selfserve.py | 34 ++++++++----------- .../src/run_inference_validation.py | 28 ++++++++------- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml index 531eb689a0..6eba5e8153 100644 --- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml +++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml @@ -39,7 +39,7 @@ inputs: description: ID of the validation run (used for updating status in self-serve) metrics_storage_uri: type: uri_file - optional: false + optional: true mode: ro_mount description: Path to the file containing the validation metrics csv storage path @@ -53,4 +53,4 @@ command: >- --publisher-name ${{inputs.publisher_name}} --validation-id ${{inputs.validation_id}} --sku ${{inputs.sku}} - --metrics-storage-uri ${{inputs.metrics_storage_uri}} \ No newline at end of file + $[[ --metrics-storage-uri ${{inputs.metrics_storage_uri}}]] \ No newline at end of file diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index 0af7fb5918..9b05530d46 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -15,7 +15,7 @@ command: >- python run_inference_validation.py --inference_payload ${{inputs.inference_payload}} $[[--expected_response ${{inputs.expected_response}}]] - --inference_response ${{inputs.inference_response}} + $[[--inference_response ${{inputs.inference_response}}]] --validation-id ${{inputs.validation_id}} --sku ${{inputs.sku}} --validation_results ${{outputs.validation_results}} @@ -33,6 +33,7 @@ inputs: inference_response: type: uri_file + optional: true description: JSON file containing the actual inference response from the deployed model. sku: diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index c327b48c1c..727efdbf44 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -67,39 +67,33 @@ def update_model_onboarding_version( ): """Update model onboarding version with benchmark results.""" current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - - metrics_path_dict = read_results_from_file(metrics_storage_uri) + if not metrics_storage_uri: + validation_success = False + metrics_url = None + else: + metrics_path_dict = read_results_from_file(metrics_storage_uri) + metrics_url = metrics_path_dict.get( + "api_inference_path") if metrics_path_dict else None + validation_success = metrics_url is not None validation_result = [] + logger.info(f"validation_success: {validation_success}, metrics_url: {metrics_url}, metrics_storage_uri: {metrics_storage_uri}") if validation_id: - if metrics_path_dict.get("api_inference_path", None): - validation_result.append({ - "Id": validation_id, - "type": "API_VALIDATION", - "passed": True, - "message": "API inference passed successfully", - "validationResultUrl": metrics_path_dict.get("api_inference_path"), - "status": "Completed", - "createdTime": current_time, - "updatedTime": current_time, - "sku": sku - }) - else: - validation_result.append({ + validation_result.append({ "Id": validation_id, "type": "API_VALIDATION", "passed": True, "message": "API inference passed successfully", - "validationResultUrl": metrics_path_dict.get("api_inference_path"), - "status": "Failed", + "validationResultUrl": metrics_url, + "status": "Completed" if validation_success else "Failed", "createdTime": current_time, "updatedTime": current_time, "sku": sku }) else: logger.error( - "Validation ID is None, not updating validation results in self-serve") + "Validation ID is None, not updating validation results in self-serve") sys.exit(1) payload = { @@ -157,7 +151,7 @@ def update_model_onboarding_version( help="Base URL of the model publisher self-serve API") parser.add_argument("--validation-id", required=True, help="Run ID of the validation run") - parser.add_argument("--metrics-storage-uri", required=True, + parser.add_argument("--metrics-storage-uri", required=False, help="URI to the storage where validation metrics are stored") parser.add_argument("--sku", required=False, default="Standard_NC24ads_A100_v4", diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index b5012d2e15..9cbfccc33b 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -231,7 +231,7 @@ def main(): help="Serialized JSON payload for inference") parser.add_argument("--expected_response", type=str, required=False, help="Path to the expected inference response JSON file.") - parser.add_argument("--inference_response", type=str, required=True, + parser.add_argument("--inference_response", type=str, required=False, help="Path to the actual inference response JSON file.") parser.add_argument("--validation_results", type=str, required=True, help="Path to save validation results.") @@ -264,24 +264,26 @@ def main(): logger.info(f"Decoded string: {decoded_str}") expected_response = json.loads(decoded_str) + inference_output = None + if args.inference_response: + inference_output = load_json(args.inference_response) + if not inference_output: + logger.error("Inference response is missing or invalid.") - inference_output = load_json(args.inference_response) - if not inference_output: - logger.error("Inference output is missing or invalid.") - sys.exit(1) - - inference_response = inference_output.get("response") - if isinstance(inference_response, str): - try: - inference_response = json.loads(inference_response) - except json.JSONDecodeError as e: - logger.warning(f"Failed to parse actualResponse as JSON: {e}") + inference_response = None + if inference_output: + inference_response = inference_output.get("response") + if isinstance(inference_response, str): + try: + inference_response = json.loads(inference_response) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse actualResponse as JSON: {e}") if inference_response is None: logger.warning("Actual response is missing or invalid. Setting it to an empty structure.") inference_response = {} - inference_time = inference_output.get("inference_time", 0) + inference_time = inference_output.get("inference_time", 0) if inference_output else 0 logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, " f"actual response: {inference_response}") From 298b076636f66d66395401a818b0ce5a12513be0 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Wed, 7 May 2025 15:41:41 +0000 Subject: [PATCH 22/29] Publish validation result for both success and failure case --- .../src/run_inference_validation.py | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 9cbfccc33b..de83072f07 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -46,21 +46,21 @@ def load_json_from_string(json_string): return None -def get_json_structure(data): +def get_json_structure(data, parent_key=''): """ - Recursively extract the structure of JSON (keys only). - - For dictionaries, returns a dict of keys mapped to their structure. - For lists, returns a list with the structure of the first element. - For other types, returns None. + Recursively extract key paths from nested JSON. """ + keys = set() if isinstance(data, dict): - return {key: get_json_structure(value) for key, value in data.items()} - elif isinstance(data, list) and len(data) > 0: - # Assume all elements share the same structure and return the structure of the first element. - return [get_json_structure(data[0])] - else: - return None + for k, v in data.items(): + full_key = f"{parent_key}.{k}" if parent_key else k + keys.add(full_key) + keys.update(get_json_structure(v, full_key)) + elif isinstance(data, list): + for index, item in enumerate(data): + full_key = f"{parent_key}[{index}]" + keys.update(get_json_structure(item, full_key)) + return keys def compare_structures(expected_response, actual_response): @@ -71,17 +71,20 @@ def compare_structures(expected_response, actual_response): """ expected_structure = get_json_structure(expected_response) actual_structure = get_json_structure(actual_response) - logger.info(f"expected_structure: {expected_structure} \n actual_structure: {actual_structure}") + logger.info(f"Expected structure: {expected_structure}") + logger.info(f"Actual structure: {actual_structure}") - structure_match = expected_structure == actual_structure if expected_response else None - structural_difference = [] + added_keys = actual_structure - expected_structure + removed_keys = expected_structure - actual_structure + structure_match = not added_keys and not removed_keys - if not structure_match: - structural_difference = [ - {"expected": expected_structure, "actual": actual_structure} - ] + structural_difference = { + "added_keys": sorted(list(added_keys)), + "removed_keys": sorted(list(removed_keys)), + } logger.info(f"Structure match: {structure_match}, Structural differences: {structural_difference}") + return { "structure_match": structure_match, "structural_difference": structural_difference From 47e3422c42a035353e33df02c658d633794a0e1b Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Sat, 10 May 2025 18:46:25 +0000 Subject: [PATCH 23/29] Maap Self serve validation AML pipeline error message update --- .../common/components/deploy_model/spec.yaml | 6 +- assets/common/src/deploy.py | 239 ++++++++------- .../spec.yaml | 13 +- .../run_inference_validation/spec.yaml | 12 +- .../validate_model_inference/spec.yaml | 16 +- .../publish_validation_results_selfserve.py | 28 +- .../src/run_inference_validation.py | 285 ++++++++++++------ 7 files changed, 394 insertions(+), 205 deletions(-) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index 066afde4ac..03c2370004 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.12 +version: 0.0.12.27 type: command is_deterministic: True @@ -37,6 +37,7 @@ command: >- $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]] --model_deployment_details ${{outputs.model_deployment_details}} --model_inference_response ${{outputs.model_inference_response}} + --deploy_error ${{outputs.deploy_error}} inputs: # Output of registering component @@ -215,6 +216,9 @@ outputs: model_inference_response: type: uri_file description: JSON file containing inference results + deploy_error: + type: uri_file + description: File containing error messages or stack traces from the validation step. tags: Preview: "" diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 5960f8c1da..646373cb3d 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -7,6 +7,7 @@ import re import time import base64 +import traceback from azure.ai.ml.entities import ( ManagedOnlineEndpoint, @@ -173,6 +174,11 @@ def parse_args(): type=str, help="Path to the inference response JSON file.", ) + parser.add_argument( + "--deploy_error", + type=str, + help="Path to the inference response JSON file.", + ) # parse args args = parser.parse_args() logger.info(f"Args received {args}") @@ -269,116 +275,145 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen def main(): """Run main function.""" args = parse_args() + logger.info(f"Arguments: {args}") ml_client = get_mlclient() + error_message = "None" # get registered model id - if args.model_id: - model_id = str(args.model_id) - elif args.registration_details_folder: - registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE - if registration_details_file.exists(): - try: - with open(registration_details_file) as f: - model_info = json.load(f) - model_id = model_info["id"] - except Exception as e: - raise Exception(f"model_registration_details json file is missing model information {e}.") + try: + if args.model_deployment_details: + with open(args.model_deployment_details, "w") as outfile: + json.dump({}, outfile) + + if args.model_inference_response: + with open(args.model_inference_response, "w") as f: + json.dump({}, f, indent=4) + + if args.deploy_error: + with open(args.deploy_error, "w") as error_file: + error_file.write(error_message) + + if args.model_id: + model_id = str(args.model_id) + elif args.registration_details_folder: + registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE + if registration_details_file.exists(): + try: + with open(registration_details_file) as f: + model_info = json.load(f) + model_id = model_info["id"] + except Exception as e: + raise Exception(f"model_registration_details json file is missing model information {e}.") + else: + raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.") else: - raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.") - else: - raise Exception("Arguments model_id and registration_details both are missing.") - - # Endpoint has following restrictions: - # 1. Name must begin with lowercase letter - # 2. Followed by lowercase letters, hyphen or numbers - # 3. End with a lowercase letter or number - - # 1. Replace underscores and slashes by hyphens and convert them to lower case. - # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it - model_name = get_model_name(model_id) - - endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21] - endpoint_name = f"{endpoint_name}-{int(time.time())}" - endpoint_name = endpoint_name - - endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name - deployment_name = args.deployment_name if args.deployment_name else "default" - - endpoint, deployment = create_endpoint_and_deployment( - ml_client=ml_client, - endpoint_name=endpoint_name, - deployment_name=deployment_name, - model_id=model_id, - args=args - ) - - response = None - if args.inference_payload or args.inference_payload_str: - print("Invoking inference with test payload ...") - try: - start_time = time.time() - if args.inference_payload_str: - print(f"Inference payload string: {args.inference_payload_str}") - decoded_bytes = base64.b64decode(args.inference_payload_str) - - # Convert bytes to string - decoded_str = decoded_bytes.decode('utf-8') - logger.info(f"Decoded string: {decoded_str}") - - payload = json.loads(decoded_str) - logger.info(f"Payload:\n {payload}") - - with open("payload.json", "w") as temp_file: - json.dump(payload, temp_file) + raise Exception("Arguments model_id and registration_details both are missing.") + + # Endpoint has following restrictions: + # 1. Name must begin with lowercase letter + # 2. Followed by lowercase letters, hyphen or numbers + # 3. End with a lowercase letter or number + + # 1. Replace underscores and slashes by hyphens and convert them to lower case. + # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it + model_name = get_model_name(model_id) + + endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21] + endpoint_name = f"{endpoint_name}-{int(time.time())}" + endpoint_name = endpoint_name + + endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name + deployment_name = args.deployment_name if args.deployment_name else "default" + + endpoint, deployment = create_endpoint_and_deployment( + ml_client=ml_client, + endpoint_name=endpoint_name, + deployment_name=deployment_name, + model_id=model_id, + args=args + ) - response = ml_client.online_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - request_file="payload.json", - ) - elif args.inference_payload: - response = ml_client.online_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - request_file=args.inference_payload, + response = None + if args.inference_payload or args.inference_payload_str: + print("Invoking inference with test payload ...") + try: + start_time = time.time() + if args.inference_payload_str: + print(f"Inference payload string: {args.inference_payload_str}") + decoded_bytes = base64.b64decode(args.inference_payload_str) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + + payload = json.loads(decoded_str) + logger.info(f"Payload:\n {payload}") + + with open("payload.json", "w") as temp_file: + json.dump(payload, temp_file) + + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file="payload.json", + ) + elif args.inference_payload: + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file=args.inference_payload, + ) + + end_time = time.time() + inference_time_ms = int((end_time - start_time) * 1000) + + logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " + + f"and response: {response}") + # Save inference response + if args.model_inference_response: + inference_result = { + "response": response, + "inference_time": inference_time_ms + } + with open(args.model_inference_response, "w") as f: + json.dump(inference_result, f, indent=4) + logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}") + except Exception as e: + raise AzureMLException._with_error( + AzureMLError.create(OnlineEndpointInvocationError, exception=e) ) - end_time = time.time() - inference_time_ms = int((end_time - start_time) * 1000) - - logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " + - f"and response: {response}") - # Save inference response - if args.model_inference_response: - inference_result = { - "response": response, - "inference_time": inference_time_ms - } - with open(args.model_inference_response, "w") as f: - json.dump(inference_result, f, indent=4) - logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}") - except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(OnlineEndpointInvocationError, exception=e) - ) + print("Saving deployment details ...") + + # write deployment details to file + endpoint_type = "aml_online_inference" + deployment_details = { + "endpoint_name": endpoint.name, + "deployment_name": deployment.name, + "endpoint_uri": endpoint.__dict__["_scoring_uri"], + "endpoint_type": endpoint_type, + "instance_type": args.instance_type, + "instance_count": args.instance_count, + "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance, + } + json_object = json.dumps(deployment_details, indent=4) + with open(args.model_deployment_details, "w") as outfile: + outfile.write(json_object) + logger.info("Saved deployment details in output json file.") - print("Saving deployment details ...") - - # write deployment details to file - endpoint_type = "aml_online_inference" - deployment_details = { - "endpoint_name": endpoint.name, - "deployment_name": deployment.name, - "endpoint_uri": endpoint.__dict__["_scoring_uri"], - "endpoint_type": endpoint_type, - "instance_type": args.instance_type, - "instance_count": args.instance_count, - "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance, - } - json_object = json.dumps(deployment_details, indent=4) - with open(args.model_deployment_details, "w") as outfile: - outfile.write(json_object) - logger.info("Saved deployment details in output json file.") + except Exception as e: + # Capture the full traceback + stack_trace = traceback.format_exc() + error_message = f"Model deployment failed.\n{stack_trace}" + logger.error(f"error_message from stack trace: {error_message}, deploy_error_path: {args.deploy_error}") + + # Write the error message to the specified error output file + if args.deploy_error: + with open(args.deploy_error, "w") as error_file: + error_file.write(error_message) + + # Re-raise the exception with the full traceback + # raise Exception(error_message) if __name__ == "__main__": diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml index 6eba5e8153..1c9a80bd95 100644 --- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml +++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml @@ -3,7 +3,7 @@ type: command is_deterministic: true name: publish_validation_results_selfserve -version: 0.0.1 +version: 0.0.1.22 display_name: Publish model validation results to Self-Serve description: | This component publishes model validation results to the Self-Serve database. @@ -42,6 +42,14 @@ inputs: optional: true mode: ro_mount description: Path to the file containing the validation metrics csv storage path + # deploy_error: + # type: uri_file + # optional: true + # description: Error message or stack trace from the deployment step + validation_error: + type: uri_file + optional: true + description: Error message or stack trace from the inference validation step code: ../../src @@ -53,4 +61,5 @@ command: >- --publisher-name ${{inputs.publisher_name}} --validation-id ${{inputs.validation_id}} --sku ${{inputs.sku}} - $[[ --metrics-storage-uri ${{inputs.metrics_storage_uri}}]] \ No newline at end of file + $[[ --metrics-storage-uri ${{inputs.metrics_storage_uri}}]] + $[[ --validation-error ${{inputs.validation_error}}]] \ No newline at end of file diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index 9b05530d46..bf8a25e1b0 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: run_inference_validation -version: 0.0.1 +version: 0.0.1.65 type: command is_deterministic: True @@ -16,10 +16,12 @@ command: >- --inference_payload ${{inputs.inference_payload}} $[[--expected_response ${{inputs.expected_response}}]] $[[--inference_response ${{inputs.inference_response}}]] + $[[--deployment_error ${{inputs.deployment_error}}]] --validation-id ${{inputs.validation_id}} --sku ${{inputs.sku}} --validation_results ${{outputs.validation_results}} --metrics_storage_uri ${{outputs.metrics_storage_uri}} + --validation_error ${{outputs.validation_error}} inputs: inference_payload: @@ -46,6 +48,11 @@ inputs: type: string optional: false description: ID of the validation run (used for updating status in self-serve) + + deployment_error: + type: uri_file + optional: true + description: Error message or stack trace from the inference validation step outputs: validation_results: @@ -54,6 +61,9 @@ outputs: metrics_storage_uri: type: uri_file description: JSON file containing the validation metrics csv storage path + validation_error: + type: uri_file + description: File containing error messages or stack traces from the validation step. tags: Preview: "" diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index 3ffb775a32..a9c14275a8 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -4,7 +4,7 @@ type: pipeline name: validate_model_inference display_name: Validate Model Inference description: deploy a model and validate it using a sample payload -version: 0.0.1 +version: 0.0.1.74 inputs: compute: @@ -136,7 +136,7 @@ outputs: jobs: online_deployment_model: type: command - component: azureml:deploy_model:0.0.12 + component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.27 compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} @@ -150,22 +150,27 @@ jobs: type: uri_file model_inference_response: type: uri_file + deploy_error: + type: uri_file run_inference_validation: type: command - component: azureml:run_inference_validation:0.0.1 + component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.65 inputs: validation_id: ${{parent.inputs.validation_id}} sku: ${{parent.inputs.instance_type}} inference_payload: ${{parent.inputs.inference_payload}} expected_response: ${{parent.inputs.inference_response}} inference_response: ${{parent.jobs.online_deployment_model.outputs.model_inference_response}} + deployment_error: ${{parent.jobs.online_deployment_model.outputs.deploy_error}} outputs: validation_results: ${{parent.outputs.validation_results}} + validation_error: + type: uri_file delete_endpoints: type: command - component: azureml:delete_endpoint:0.0.7 + component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1 inputs: model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} endpoint_name: ${{parent.inputs.endpoint_name}} @@ -173,7 +178,7 @@ jobs: publish_results: type: command - component: azureml:publish_validation_results_selfserve:0.0.1 + component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.22 inputs: publisher_name: ${{parent.inputs.publisher_name}} model_name: ${{parent.inputs.model_name}} @@ -182,3 +187,4 @@ jobs: validation_id: ${{parent.inputs.validation_id}} selfserve_base_url: ${{parent.inputs.selfserve_base_url}} metrics_storage_uri: ${{parent.jobs.run_inference_validation.outputs.metrics_storage_uri}} + validation_error: ${{parent.jobs.run_inference_validation.outputs.validation_error}} diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index 727efdbf44..11779360d0 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -63,7 +63,8 @@ def update_model_onboarding_version( sku, validation_id, selfserve_base_url, - metrics_storage_uri + metrics_storage_uri, + error_message ): """Update model onboarding version with benchmark results.""" current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") @@ -86,6 +87,7 @@ def update_model_onboarding_version( "passed": True, "message": "API inference passed successfully", "validationResultUrl": metrics_url, + "errorMessage": error_message if error_message else None, "status": "Completed" if validation_success else "Failed", "createdTime": current_time, "updatedTime": current_time, @@ -156,10 +158,31 @@ def update_model_onboarding_version( parser.add_argument("--sku", required=False, default="Standard_NC24ads_A100_v4", help="Suggested SKU based on benchmark results") + # parser.add_argument("--deploy-error", required=False, + # help="Path to the file containing deployment error messages or stack traces") + parser.add_argument("--validation-error", required=False, + help="Path to the file containing validation error messages or stack traces") args = parser.parse_args() logger.info(f"Arguments: {args}") + error_message = "" + # if args.deploy_error: + # try: + # with open(args.deploy_error, "r") as f: + # deploy_error_message = f.read().strip() + # error_message += f"Deployment Error: {deploy_error_message}\n" + # except Exception as e: + # logger.warning(f"Failed to read deploy_error file: {e}") + + if args.validation_error: + try: + with open(args.validation_error, "r") as f: + validation_error_message = f.read().strip() + error_message += f"Validation Error: {validation_error_message}\n" + except Exception as e: + logger.warning(f"Failed to read validation_error file: {e}") + try: result = update_model_onboarding_version( args.publisher_name, @@ -168,7 +191,8 @@ def update_model_onboarding_version( args.sku, args.validation_id, args.selfserve_base_url, - args.metrics_storage_uri + args.metrics_storage_uri, + error_message ) logger.info("Model onboarding version update completed successfully") except Exception as e: diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index de83072f07..71c47ff5ed 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -8,6 +8,8 @@ import argparse import os import sys +import traceback +import re from datetime import datetime, timezone from azureml.core import Run from azureml.model.mgmt.utils.common_utils import get_mlclient @@ -46,50 +48,104 @@ def load_json_from_string(json_string): return None -def get_json_structure(data, parent_key=''): +def set_nested_value(d, keys, value): """ - Recursively extract key paths from nested JSON. + Helper to set a value into a nested dictionary/list from a list of keys/indexes. """ - keys = set() + for i, key in enumerate(keys): + is_last = i == len(keys) - 1 + if isinstance(key, int): + while len(d) <= key: + d.append({} if not is_last else None) + if is_last: + d[key] = value + else: + if not isinstance(d[key], (dict, list)): + d[key] = {} + d = d[key] + else: + if key not in d or not isinstance(d[key], (dict, list)): + d[key] = {} if not is_last else None + if is_last: + d[key] = value + else: + d = d[key] + +def parse_key_path(key): + """ + Converts a key string like '[0].a.b[1]' to a list of keys: [0, 'a', 'b', 1] + """ + parts = re.findall(r'\[(\d+)\]|([^.]+)', key) + return [int(i) if i else j for i, j in parts] + +def build_nested_json(flat_dict): + """ + Converts a flat key-path dictionary to nested JSON. + """ + result = {} if flat_dict else None + for key_path, value in flat_dict.items(): + keys = parse_key_path(key_path) + if isinstance(keys[0], int): + if not isinstance(result, list): + result = [] + set_nested_value(result, keys, value) + return result + +def get_json_structure_with_values(data, parent_key=''): + """ + Recursively extract key paths and their values from nested JSON. + Returns a dictionary of full_key_path: value + """ + items = {} if isinstance(data, dict): for k, v in data.items(): full_key = f"{parent_key}.{k}" if parent_key else k - keys.add(full_key) - keys.update(get_json_structure(v, full_key)) + if isinstance(v, (dict, list)): + items.update(get_json_structure_with_values(v, full_key)) + else: + items[full_key] = v elif isinstance(data, list): for index, item in enumerate(data): - full_key = f"{parent_key}[{index}]" - keys.update(get_json_structure(item, full_key)) - return keys - + full_key = f"{parent_key}[{index}]" if parent_key else f"[{index}]" + if isinstance(item, (dict, list)): + items.update(get_json_structure_with_values(item, full_key)) + else: + items[full_key] = item + return items def compare_structures(expected_response, actual_response): """ - Compare JSON structures (keys only) of expected and actual. - - Returns a dictionary with structural differences and a match flag. + Compare JSON structures and return full nested added/removed diffs. """ - expected_structure = get_json_structure(expected_response) - actual_structure = get_json_structure(actual_response) - logger.info(f"Expected structure: {expected_structure}") - logger.info(f"Actual structure: {actual_structure}") - - added_keys = actual_structure - expected_structure - removed_keys = expected_structure - actual_structure - structure_match = not added_keys and not removed_keys - - structural_difference = { - "added_keys": sorted(list(added_keys)), - "removed_keys": sorted(list(removed_keys)), - } + expected_structure = get_json_structure_with_values(expected_response) + actual_structure = get_json_structure_with_values(actual_response) - logger.info(f"Structure match: {structure_match}, Structural differences: {structural_difference}") + logger.info(f"Expected flat structure: {expected_structure}") + logger.info(f"Actual flat structure: {actual_structure}") - return { + added_keys = actual_structure.keys() - expected_structure.keys() + removed_keys = expected_structure.keys() - actual_structure.keys() + + added_flat = {key: actual_structure[key] for key in added_keys} + removed_flat = {key: expected_structure[key] for key in removed_keys} + + added_nested = build_nested_json(added_flat) + removed_nested = build_nested_json(removed_flat) + + structure_match = not added_flat and not removed_flat + + result = { "structure_match": structure_match, - "structural_difference": structural_difference + "structural_difference": { + "added": added_nested, + "removed": removed_nested + } } + logger.info("Comparison result:") + logger.info(json.dumps(result, indent=4)) + + return result def save_validation_result(request_details, output_dir, validation_id, sku, status): """Save validation results to a JSON file.""" @@ -227,6 +283,109 @@ def extract_datastore_info(datastore_uri_path): return None, None +def run_inference_validation(args): + """Perform the inference validation logic.""" + try: + error_message = "" + if args.deployment_error: + try: + with open(args.deployment_error, "r") as f: + deployment_error = f.read().strip() + error_message += deployment_error + except Exception as e: + logger.warning(f"Failed to read validation_error file: {e}") + + if args.validation_error: + with open(args.validation_error, "w") as error_file: + error_file.write(error_message) + inference_payload = None + if args.inference_payload: + decoded_bytes = base64.b64decode(args.inference_payload) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + + inference_payload = json.loads(decoded_str) + + expected_response = None + if args.expected_response: + decoded_bytes = base64.b64decode(args.expected_response) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + expected_response = json.loads(decoded_str) + + inference_output = None + if args.inference_response: + inference_output = load_json(args.inference_response) + if not inference_output: + logger.error("Inference response is missing or invalid.") + + inference_response = None + if inference_output: + inference_response = inference_output.get("response") + if isinstance(inference_response, str): + try: + inference_response = json.loads(inference_response) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse actualResponse as JSON: {e}") + + if inference_response is None: + logger.warning("Actual response is missing or invalid. Setting it to an empty structure.") + inference_response = {} + + inference_time = inference_output.get("inference_time", 0) if inference_output else 0 + logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, " + f"actual response: {inference_response}") + + # Infer success status based on the presence of a valid response + success_status = inference_response is not None and bool(inference_response) + status = "Success" if success_status else "Failed" + + request_details = { + "providedRequest": inference_payload, + "providedResponse": expected_response, + "actualResponse": inference_response, + "responseTimeMs": inference_time, + "errorMessage": error_message, + "structuralDiff": None, + } + logger.info(f"Request details: {request_details}") + if expected_response and inference_response: + comparison_result = compare_structures(expected_response, inference_response) + request_details["structuralDiff"] = comparison_result.get("structural_difference", []) + + # Save the validation result. + save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, status) + logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_results}") + + store_metrics_paths(args.metrics_storage_uri) + except Exception as e: + stack_trace = traceback.format_exc() + error_message = f"Model validation failed.\n{stack_trace}" + logger.error(error_message) + # Save the error message in the request details + request_details = { + "providedRequest": None, + "providedResponse": None, + "actualResponse": None, + "responseTimeMs": 0, + "errorMessage": error_message, + "structuralDiff": None, + } + + # Save the validation result with the error message + save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, "Failed") + + # Write the error message to the specified error output file + if args.validation_error: + with open(args.validation_error, "w") as error_file: + error_file.write(error_message) + # raise Exception(f"Failed to run inference validation: {error_message}") + + def main(): """Compare expected and actual inference response structures.""" parser = argparse.ArgumentParser() @@ -236,6 +395,8 @@ def main(): help="Path to the expected inference response JSON file.") parser.add_argument("--inference_response", type=str, required=False, help="Path to the actual inference response JSON file.") + parser.add_argument("--deployment_error", type=str, required=False, + help="Path to the deployment_error.") parser.add_argument("--validation_results", type=str, required=True, help="Path to save validation results.") parser.add_argument("--metrics_storage_uri", type=str, required=True, @@ -245,72 +406,12 @@ def main(): help="Suggested SKU based on benchmark results") parser.add_argument("--validation-id", required=True, help="Run ID of the validation run") + parser.add_argument("--validation_error", type=str, required=False, + help="Path to the file where error messages or stack traces will be written.") args = parser.parse_args() - - inference_payload = None - if args.inference_payload: - decoded_bytes = base64.b64decode(args.inference_payload) - - # Convert bytes to string - decoded_str = decoded_bytes.decode('utf-8') - logger.info(f"Decoded string: {decoded_str}") - - inference_payload = json.loads(decoded_str) - - expected_response = None - if args.expected_response: - decoded_bytes = base64.b64decode(args.expected_response) - - # Convert bytes to string - decoded_str = decoded_bytes.decode('utf-8') - logger.info(f"Decoded string: {decoded_str}") - expected_response = json.loads(decoded_str) - - inference_output = None - if args.inference_response: - inference_output = load_json(args.inference_response) - if not inference_output: - logger.error("Inference response is missing or invalid.") - - inference_response = None - if inference_output: - inference_response = inference_output.get("response") - if isinstance(inference_response, str): - try: - inference_response = json.loads(inference_response) - except json.JSONDecodeError as e: - logger.warning(f"Failed to parse actualResponse as JSON: {e}") - - if inference_response is None: - logger.warning("Actual response is missing or invalid. Setting it to an empty structure.") - inference_response = {} - - inference_time = inference_output.get("inference_time", 0) if inference_output else 0 - logger.info(f"inference_payload: {inference_payload}, expected response: {expected_response}, " - f"actual response: {inference_response}") - - # Infer success status based on the presence of a valid response - success_status = inference_response is not None and bool(inference_response) - status = "Success" if success_status else "Failed" - - request_details = { - "providedRequest": inference_payload, - "providedResponse": expected_response, - "actualResponse": inference_response, - "responseTimeMs": inference_time, - "errorMessage": None, - "structuralDiff": None, - } - if expected_response: - comparison_result = compare_structures(expected_response, inference_response) - request_details["structuralDiff"] = comparison_result.get("structural_difference", []) - - # Save the validation result. - save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, status) - logger.info(f"validation_result: {request_details}, Validation result saved to {args.validation_results}") - - store_metrics_paths(args.metrics_storage_uri) + logger.info(f"Arguments: {args}") + run_inference_validation(args) if __name__ == "__main__": From c1211a9859b8f7da9ab3487f1d444508a807d729 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Sun, 11 May 2025 13:58:58 +0000 Subject: [PATCH 24/29] Maap Self serve validation AML pipeline error message update --- assets/common/components/deploy_model/spec.yaml | 2 +- assets/common/src/deploy.py | 11 +++++------ .../publish_validation_results_selfserve/spec.yaml | 6 +----- .../components/run_inference_validation/spec.yaml | 2 +- .../components/validate_model_inference/spec.yaml | 9 ++++----- .../src/publish_validation_results_selfserve.py | 10 ---------- .../model_management/src/run_inference_validation.py | 10 +++++++--- 7 files changed, 19 insertions(+), 31 deletions(-) diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml index 03c2370004..f4678e2426 100644 --- a/assets/common/components/deploy_model/spec.yaml +++ b/assets/common/components/deploy_model/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: deploy_model -version: 0.0.12.27 +version: 0.0.12.29 type: command is_deterministic: True diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 646373cb3d..204b04623e 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -274,13 +274,11 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen @swallow_all_exceptions(logger) def main(): """Run main function.""" - args = parse_args() - logger.info(f"Arguments: {args}") - ml_client = get_mlclient() - error_message = "None" - # get registered model id - try: + args = parse_args() + logger.info(f"Arguments: {args}") + ml_client = get_mlclient() + error_message = "" if args.model_deployment_details: with open(args.model_deployment_details, "w") as outfile: json.dump({}, outfile) @@ -293,6 +291,7 @@ def main(): with open(args.deploy_error, "w") as error_file: error_file.write(error_message) + # get registered model id if args.model_id: model_id = str(args.model_id) elif args.registration_details_folder: diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml index 1c9a80bd95..498fde2dc1 100644 --- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml +++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml @@ -3,7 +3,7 @@ type: command is_deterministic: true name: publish_validation_results_selfserve -version: 0.0.1.22 +version: 0.0.1.23 display_name: Publish model validation results to Self-Serve description: | This component publishes model validation results to the Self-Serve database. @@ -42,10 +42,6 @@ inputs: optional: true mode: ro_mount description: Path to the file containing the validation metrics csv storage path - # deploy_error: - # type: uri_file - # optional: true - # description: Error message or stack trace from the deployment step validation_error: type: uri_file optional: true diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index bf8a25e1b0..c5e9f5cfb0 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: run_inference_validation -version: 0.0.1.65 +version: 0.0.1.66 type: command is_deterministic: True diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index a9c14275a8..731b6cb4be 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -4,7 +4,7 @@ type: pipeline name: validate_model_inference display_name: Validate Model Inference description: deploy a model and validate it using a sample payload -version: 0.0.1.74 +version: 0.0.1.76 inputs: compute: @@ -136,7 +136,7 @@ outputs: jobs: online_deployment_model: type: command - component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.27 + component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.29 compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} @@ -155,7 +155,7 @@ jobs: run_inference_validation: type: command - component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.65 + component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.66 inputs: validation_id: ${{parent.inputs.validation_id}} sku: ${{parent.inputs.instance_type}} @@ -174,11 +174,10 @@ jobs: inputs: model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} endpoint_name: ${{parent.inputs.endpoint_name}} - compute: ${{parent.inputs.compute}} publish_results: type: command - component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.22 + component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.23 inputs: publisher_name: ${{parent.inputs.publisher_name}} model_name: ${{parent.inputs.model_name}} diff --git a/assets/training/model_management/src/publish_validation_results_selfserve.py b/assets/training/model_management/src/publish_validation_results_selfserve.py index 11779360d0..e6fa644f6e 100644 --- a/assets/training/model_management/src/publish_validation_results_selfserve.py +++ b/assets/training/model_management/src/publish_validation_results_selfserve.py @@ -158,8 +158,6 @@ def update_model_onboarding_version( parser.add_argument("--sku", required=False, default="Standard_NC24ads_A100_v4", help="Suggested SKU based on benchmark results") - # parser.add_argument("--deploy-error", required=False, - # help="Path to the file containing deployment error messages or stack traces") parser.add_argument("--validation-error", required=False, help="Path to the file containing validation error messages or stack traces") @@ -167,14 +165,6 @@ def update_model_onboarding_version( logger.info(f"Arguments: {args}") error_message = "" - # if args.deploy_error: - # try: - # with open(args.deploy_error, "r") as f: - # deploy_error_message = f.read().strip() - # error_message += f"Deployment Error: {deploy_error_message}\n" - # except Exception as e: - # logger.warning(f"Failed to read deploy_error file: {e}") - if args.validation_error: try: with open(args.validation_error, "r") as f: diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 71c47ff5ed..0997b01521 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -283,9 +283,10 @@ def extract_datastore_info(datastore_uri_path): return None, None -def run_inference_validation(args): +def run_inference_validation(): """Perform the inference validation logic.""" try: + args = parse_args() error_message = "" if args.deployment_error: try: @@ -385,8 +386,11 @@ def run_inference_validation(args): error_file.write(error_message) # raise Exception(f"Failed to run inference validation: {error_message}") - def main(): + run_inference_validation() + + +def parse_args(): """Compare expected and actual inference response structures.""" parser = argparse.ArgumentParser() parser.add_argument("--inference_payload", type=str, required=True, @@ -411,7 +415,7 @@ def main(): args = parser.parse_args() logger.info(f"Arguments: {args}") - run_inference_validation(args) + return args if __name__ == "__main__": From 3ed891ba8bad2ef941c30c2f0a56775bf6ac4f91 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Tue, 13 May 2025 09:25:13 +0000 Subject: [PATCH 25/29] Maap Self serve validation AML pipeline error message update --- assets/training/model_management/src/run_inference_validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 0997b01521..39164a0eb4 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -379,6 +379,7 @@ def run_inference_validation(): # Save the validation result with the error message save_validation_result(request_details, args.validation_results, args.validation_id, args.sku, "Failed") + store_metrics_paths(args.metrics_storage_uri) # Write the error message to the specified error output file if args.validation_error: From 6faef71d3395f148ef40039a474b5afaee9d3def Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Mon, 19 May 2025 07:26:30 +0000 Subject: [PATCH 26/29] AML MaaP Self serve validation pipeline refactoring --- .../deploy_inference_model/spec.yaml | 225 ++++++++++ assets/common/src/deploy_inference_model.py | 417 ++++++++++++++++++ .../validate_model_inference/spec.yaml | 10 +- .../src/run_inference_validation.py | 5 +- 4 files changed, 649 insertions(+), 8 deletions(-) create mode 100644 assets/common/components/deploy_inference_model/spec.yaml create mode 100644 assets/common/src/deploy_inference_model.py diff --git a/assets/common/components/deploy_inference_model/spec.yaml b/assets/common/components/deploy_inference_model/spec.yaml new file mode 100644 index 0000000000..efae05fea4 --- /dev/null +++ b/assets/common/components/deploy_inference_model/spec.yaml @@ -0,0 +1,225 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json +name: deploy_inference_model +version: 0.0.1 +type: command + +is_deterministic: True + +display_name: Deploy model +description: + Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. + +environment: azureml://registries/azureml/environments/python-sdk-v2/versions/29 + +code: ../../src +command: >- + python deploy_inference_model.py + $[[--registration_details_folder ${{inputs.registration_details_folder}}]] + $[[--model_id ${{inputs.model_id}}]] + $[[--inference_payload ${{inputs.inference_payload}}]] + $[[--inference_payload_str ${{inputs.inference_payload_str}}]] + $[[--endpoint_name ${{inputs.endpoint_name}}]] + $[[--deployment_name ${{inputs.deployment_name}}]] + $[[--instance_type ${{inputs.instance_type}}]] + $[[--instance_count ${{inputs.instance_count}}]] + $[[--max_concurrent_requests_per_instance ${{inputs.max_concurrent_requests_per_instance}}]] + $[[--request_timeout_ms ${{inputs.request_timeout_ms}}]] + $[[--max_queue_wait_ms ${{inputs.max_queue_wait_ms}}]] + $[[--failure_threshold_readiness_probe ${{inputs.failure_threshold_readiness_probe}}]] + $[[--success_threshold_readiness_probe ${{inputs.success_threshold_readiness_probe}}]] + $[[--timeout_readiness_probe ${{inputs.timeout_readiness_probe}}]] + $[[--period_readiness_probe ${{inputs.period_readiness_probe}}]] + $[[--initial_delay_readiness_probe ${{inputs.initial_delay_readiness_probe}}]] + $[[--failure_threshold_liveness_probe ${{inputs.failure_threshold_liveness_probe}}]] + $[[--timeout_liveness_probe ${{inputs.timeout_liveness_probe}}]] + $[[--period_liveness_probe ${{inputs.period_liveness_probe}}]] + $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]] + $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]] + --model_deployment_details ${{outputs.model_deployment_details}} + --model_inference_response ${{outputs.model_inference_response}} + --deploy_error ${{outputs.deploy_error}} + +inputs: + # Output of registering component + registration_details_folder: + type: uri_folder + optional: true + description: Folder containing model registration details in a JSON file named model_registration_details.json + + model_id: + type: string + optional: true + description: | + Asset ID of the model registered in workspace/registry. + Registry - azureml://registries//models//versions/ + Workspace - azureml:: + + inference_payload: + type: uri_file + optional: true + description: JSON payload which would be used to validate deployment + + inference_payload_str: + type: string + optional: true + description: Serialized JSON payload which would be used to validate deployment + + endpoint_name: + type: string + optional: true + description: Name of the endpoint + + deployment_name: + type: string + optional: true + default: default + description: Name of the deployment + + instance_type: + type: string + optional: true + enum: + - Standard_DS1_v2 + - Standard_DS2_v2 + - Standard_DS3_v2 + - Standard_DS4_v2 + - Standard_DS5_v2 + - Standard_F2s_v2 + - Standard_F4s_v2 + - Standard_F8s_v2 + - Standard_F16s_v2 + - Standard_F32s_v2 + - Standard_F48s_v2 + - Standard_F64s_v2 + - Standard_F72s_v2 + - Standard_FX24mds + - Standard_FX36mds + - Standard_FX48mds + - Standard_E2s_v3 + - Standard_E4s_v3 + - Standard_E8s_v3 + - Standard_E16s_v3 + - Standard_E32s_v3 + - Standard_E48s_v3 + - Standard_E64s_v3 + - Standard_NC4as_T4_v3 + - Standard_NC6s_v2 + - Standard_NC6s_v3 + - Standard_NC8as_T4_v3 + - Standard_NC12s_v2 + - Standard_NC12s_v3 + - Standard_NC16as_T4_v3 + - Standard_NC24s_v2 + - Standard_NC24s_v3 + - Standard_NC24rs_v3 + - Standard_NC24ads_A100_v4 + - Standard_NC48ads_A100_v4 + - Standard_NC96ads_A100_v4 + - Standard_NC64as_T4_v3 + - Standard_ND40rs_v2 + - Standard_ND96asr_v4 + - Standard_ND96amsr_A100_v4 + default: Standard_NC24s_v3 + description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available. + + instance_count: + type: integer + optional: true + default: 1 + description: Number of instances you want to use for deployment. Make sure instance type have enough quota available. + + max_concurrent_requests_per_instance: + type: integer + default: 1 + optional: true + description: Maximum concurrent requests to be handled per instance + + request_timeout_ms: + type: integer + default: 60000 + optional: true + description: Request timeout in ms. Max limit is 90000. + + max_queue_wait_ms: + type: integer + default: 60000 + optional: true + description: Maximum queue wait time of a request in ms + + failure_threshold_readiness_probe: + type: integer + default: 10 + optional: true + description: The number of times system will try after failing the readiness probe + + success_threshold_readiness_probe: + type: integer + default: 1 + optional: true + description: The minimum consecutive successes for the readiness probe to be considered successful after having failed + + timeout_readiness_probe: + type: integer + default: 10 + optional: true + description: The number of seconds after which the readiness probe times out + + period_readiness_probe: + type: integer + default: 10 + optional: true + description: How often (in seconds) to perform the readiness probe + + initial_delay_readiness_probe: + type: integer + default: 10 + optional: true + description: The number of seconds after the container has started before the readiness probe is initiated + + failure_threshold_liveness_probe: + type: integer + default: 30 + optional: true + description: The number of times system will try after failing the liveness probe + + timeout_liveness_probe: + type: integer + default: 10 + optional: true + description: The number of seconds after which the liveness probe times out + + period_liveness_probe: + type: integer + default: 10 + optional: true + description: How often (in seconds) to perform the liveness probe + + initial_delay_liveness_probe: + type: integer + default: 10 + optional: true + description: The number of seconds after the container has started before the liveness probe is initiated + + egress_public_network_access: + type: string + default: enabled + optional: true + enum: + - enabled + - disabled + description: Setting it to disabled secures the deployment by restricting communication between the deployment and the Azure resources used by it + +outputs: + model_deployment_details: + type: uri_file + description: Json file to which deployment details will be written + model_inference_response: + type: uri_file + description: JSON file containing inference results + deploy_error: + type: uri_file + description: File containing error messages or stack traces from the validation step. + +tags: + Preview: "" + Internal: "" diff --git a/assets/common/src/deploy_inference_model.py b/assets/common/src/deploy_inference_model.py new file mode 100644 index 0000000000..7230258fe5 --- /dev/null +++ b/assets/common/src/deploy_inference_model.py @@ -0,0 +1,417 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Run Model deployment module.""" +import argparse +import json +import re +import time +import base64 +import traceback + +from azure.ai.ml.entities import ( + ManagedOnlineEndpoint, + ManagedOnlineDeployment, + OnlineRequestSettings, + ProbeSettings, +) +from azureml._common._error_definition import AzureMLError +from azureml._common.exceptions import AzureMLException +from pathlib import Path + +from utils.config import AppName, ComponentVariables +from utils.common_utils import get_mlclient, get_model_name +from utils.logging_utils import custom_dimensions, get_logger +from utils.exceptions import ( + swallow_all_exceptions, + OnlineEndpointInvocationError, + EndpointCreationError, + DeploymentCreationError, +) + + +MAX_REQUEST_TIMEOUT = 90000 +MAX_INSTANCE_COUNT = 20 +MAX_DEPLOYMENT_LOG_TAIL_LINES = 10000 + +logger = get_logger(__name__) +custom_dimensions.app_name = AppName.DEPLOY_MODEL + + +def parse_args(): + """Return arguments.""" + parser = argparse.ArgumentParser() + + # Defaults for managed online endpoint has been picked mostly from: + # https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-deployment-managed-online + # Some of the defaults have been tweaked to cater to large models. + + # add arguments + parser.add_argument( + "--registration_details_folder", + type=Path, + help="Folder containing model registration details in a JSON file named model_registration_details.json", + ) + parser.add_argument( + "--model_id", + type=str, + help="Registered mlflow model id", + ) + parser.add_argument( + "--inference_payload", + type=Path, + help="Json file with inference endpoint payload.", + ) + parser.add_argument( + "--inference_payload_str", + type=str, + help="Serialized JSON payload for inference.", + ) + parser.add_argument( + "--endpoint_name", + type=str, + help="Name of the endpoint", + ) + parser.add_argument("--deployment_name", type=str, help="Name of the the deployment") + parser.add_argument( + "--instance_type", + type=str, + help="Compute instance type to deploy model", + default="Standard_NC24s_v3", + ) + parser.add_argument( + "--instance_count", + type=int, + help="Number of compute instances to deploy model", + default=1, + choices=range(1, MAX_INSTANCE_COUNT), + ) + parser.add_argument( + "--max_concurrent_requests_per_instance", + type=int, + default=1, + help="Maximum concurrent requests to be handled per instance", + ) + parser.add_argument( + "--request_timeout_ms", + type=int, + default=60000, # 1min + help="Request timeout in ms.", + ) + parser.add_argument( + "--max_queue_wait_ms", + type=int, + default=60000, # 1min + help="Maximum queue wait time of a request in ms", + ) + parser.add_argument( + "--failure_threshold_readiness_probe", + type=int, + default=10, + help="No of times system will try after failing the readiness probe", + ) + parser.add_argument( + "--success_threshold_readiness_probe", + type=int, + default=1, + help="The minimum consecutive successes for the readiness probe to be considered successful, after fail", + ) + parser.add_argument( + "--timeout_readiness_probe", + type=int, + default=10, + help="The number of seconds after which the readiness probe times out", + ) + parser.add_argument( + "--period_readiness_probe", + type=int, + default=10, + help="How often (in seconds) to perform the readiness probe", + ) + parser.add_argument( + "--initial_delay_readiness_probe", + type=int, + default=10, + help="The number of seconds after the container has started before the readiness probe is initiated", + ) + parser.add_argument( + "--failure_threshold_liveness_probe", + type=int, + default=30, + help="No of times system will try after failing the liveness probe", + ) + parser.add_argument( + "--timeout_liveness_probe", + type=int, + default=10, + help="The number of seconds after which the liveness probe times out", + ) + parser.add_argument( + "--period_liveness_probe", + type=int, + default=10, + help="How often (in seconds) to perform the liveness probe", + ) + parser.add_argument( + "--initial_delay_liveness_probe", + type=int, + default=10, + help="The number of seconds after the container has started before the liveness probe is initiated", + ) + parser.add_argument( + "--egress_public_network_access", + type=str, + default="enabled", + help="Secures the deployment by restricting interaction between deployment and Azure resources used by it", + ) + parser.add_argument( + "--model_deployment_details", + type=str, + help="Json file to which deployment details will be written", + ) + parser.add_argument( + "--model_inference_response", + type=str, + help="Path to the inference response JSON file.", + ) + parser.add_argument( + "--deploy_error", + type=str, + help="Path to the inference response JSON file.", + ) + # parse args + args = parser.parse_args() + logger.info(f"Args received {args}") + print("args received ", args) + + # Validating passed input values + if args.max_concurrent_requests_per_instance < 1: + raise Exception("Arg max_concurrent_requests_per_instance cannot be less than 1") + if args.request_timeout_ms < 1 or args.request_timeout_ms > MAX_REQUEST_TIMEOUT: + raise Exception(f"Arg request_timeout_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}") + if args.max_queue_wait_ms < 1 or args.max_queue_wait_ms > MAX_REQUEST_TIMEOUT: + raise Exception(f"Arg max_queue_wait_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}") + + return args + + +def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args): + """Create endpoint and deployment and return details.""" + endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token") + + # deployment + deployment = ManagedOnlineDeployment( + name=deployment_name, + endpoint_name=endpoint_name, + model=model_id, + instance_type=args.instance_type, + instance_count=args.instance_count, + request_settings=OnlineRequestSettings( + max_concurrent_requests_per_instance=args.max_concurrent_requests_per_instance, + request_timeout_ms=args.request_timeout_ms, + max_queue_wait_ms=args.max_queue_wait_ms, + ), + liveness_probe=ProbeSettings( + failure_threshold=args.failure_threshold_liveness_probe, + timeout=args.timeout_liveness_probe, + period=args.period_liveness_probe, + initial_delay=args.initial_delay_liveness_probe, + ), + readiness_probe=ProbeSettings( + failure_threshold=args.failure_threshold_readiness_probe, + success_threshold=args.success_threshold_readiness_probe, + timeout=args.timeout_readiness_probe, + period=args.period_readiness_probe, + initial_delay=args.initial_delay_readiness_probe, + ), + egress_public_network_access=args.egress_public_network_access, + ) + + try: + logger.info(f"Creating endpoint {endpoint_name}") + ml_client.begin_create_or_update(endpoint).wait() + endpoint = ml_client.online_endpoints.get(endpoint.name) + logger.info(f"Endpoint created {endpoint.id}") + except Exception as e: + raise AzureMLException._with_error( + AzureMLError.create(EndpointCreationError, exception=e) + ) + + try: + logger.info(f"Creating deployment {deployment}") + ml_client.online_deployments.begin_create_or_update(deployment).wait() + except Exception as e: + try: + logger.error("Deployment failed. Printing deployment logs") + logs = ml_client.online_deployments.get_logs( + name=deployment_name, + endpoint_name=endpoint_name, + lines=MAX_DEPLOYMENT_LOG_TAIL_LINES + ) + logger.error(logs) + except Exception as ex: + logger.error(f"Error in fetching deployment logs: {ex}") + + raise AzureMLException._with_error( + AzureMLError.create(DeploymentCreationError, exception=e) + ) + + logger.info(f"Deployment successful. Updating endpoint to take 100% traffic for deployment {deployment_name}") + + # deployment to take 100% traffic + endpoint.traffic = {deployment.name: 100} + try: + ml_client.begin_create_or_update(endpoint).wait() + endpoint = ml_client.online_endpoints.get(endpoint.name) + except Exception as e: + error_msg = f"Error occured while updating endpoint traffic. Deployment should be usable. Exception - {e}" + raise Exception(error_msg) + + logger.info(f"Endpoint updated to take 100% traffic for deployment {deployment_name}") + return endpoint, deployment + + +@swallow_all_exceptions(logger) +def main(): + """Run main function.""" + try: + args = parse_args() + logger.info(f"Arguments: {args}") + ml_client = get_mlclient() + error_message = "" + if args.model_deployment_details: + with open(args.model_deployment_details, "w") as outfile: + json.dump({}, outfile) + + if args.model_inference_response: + with open(args.model_inference_response, "w") as f: + json.dump({}, f, indent=4) + + if args.deploy_error: + with open(args.deploy_error, "w") as error_file: + error_file.write(error_message) + + # get registered model id + if args.model_id: + model_id = str(args.model_id) + elif args.registration_details_folder: + registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE + if registration_details_file.exists(): + try: + with open(registration_details_file) as f: + model_info = json.load(f) + model_id = model_info["id"] + except Exception as e: + raise Exception(f"model_registration_details json file is missing model information {e}.") + else: + raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.") + else: + raise Exception("Arguments model_id and registration_details both are missing.") + + # Endpoint has following restrictions: + # 1. Name must begin with lowercase letter + # 2. Followed by lowercase letters, hyphen or numbers + # 3. End with a lowercase letter or number + + # 1. Replace underscores and slashes by hyphens and convert them to lower case. + # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it + model_name = get_model_name(model_id) + + endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21] + endpoint_name = f"{endpoint_name}-{int(time.time())}" + endpoint_name = endpoint_name + + endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name + deployment_name = args.deployment_name if args.deployment_name else "default" + + endpoint, deployment = create_endpoint_and_deployment( + ml_client=ml_client, + endpoint_name=endpoint_name, + deployment_name=deployment_name, + model_id=model_id, + args=args + ) + + response = None + if args.inference_payload or args.inference_payload_str: + print("Invoking inference with test payload ...") + try: + start_time = time.time() + if args.inference_payload_str: + print(f"Inference payload string: {args.inference_payload_str}") + decoded_bytes = base64.b64decode(args.inference_payload_str) + + # Convert bytes to string + decoded_str = decoded_bytes.decode('utf-8') + logger.info(f"Decoded string: {decoded_str}") + + payload = json.loads(decoded_str) + logger.info(f"Payload:\n {payload}") + + with open("payload.json", "w") as temp_file: + json.dump(payload, temp_file) + + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file="payload.json", + ) + elif args.inference_payload: + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file=args.inference_payload, + ) + + end_time = time.time() + inference_time_ms = int((end_time - start_time) * 1000) + + logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " + + f"and response: {response}") + # Save inference response + if args.model_inference_response: + inference_result = { + "response": response, + "inference_time": inference_time_ms + } + with open(args.model_inference_response, "w") as f: + json.dump(inference_result, f, indent=4) + logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}") + except Exception as e: + raise AzureMLException._with_error( + AzureMLError.create(OnlineEndpointInvocationError, exception=e) + ) + + print("Saving deployment details ...") + + # write deployment details to file + endpoint_type = "aml_online_inference" + deployment_details = { + "endpoint_name": endpoint.name, + "deployment_name": deployment.name, + "endpoint_uri": endpoint.__dict__["_scoring_uri"], + "endpoint_type": endpoint_type, + "instance_type": args.instance_type, + "instance_count": args.instance_count, + "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance, + } + json_object = json.dumps(deployment_details, indent=4) + with open(args.model_deployment_details, "w") as outfile: + outfile.write(json_object) + logger.info("Saved deployment details in output json file.") + + except Exception as e: + # Capture the full traceback + stack_trace = traceback.format_exc() + error_message = f"Model deployment failed.\n{stack_trace}" + logger.error(f"error_message: {error_message}, deploy_error_path: {args.deploy_error}") + + # Write the error message to the specified error output file + if args.deploy_error: + with open(args.deploy_error, "w") as error_file: + error_file.write(error_message) + + +if __name__ == "__main__": + # run main function + main() diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index 731b6cb4be..b754f283e3 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -4,7 +4,7 @@ type: pipeline name: validate_model_inference display_name: Validate Model Inference description: deploy a model and validate it using a sample payload -version: 0.0.1.76 +version: 0.0.1 inputs: compute: @@ -136,7 +136,7 @@ outputs: jobs: online_deployment_model: type: command - component: azureml://registries/azureml-preview-test1/components/deploy_model/versions/0.0.12.29 + component: azureml:deploy_inference_model:0.0.1 compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} @@ -155,7 +155,7 @@ jobs: run_inference_validation: type: command - component: azureml://registries/azureml-preview-test1/components/run_inference_validation/versions/0.0.1.66 + component: azureml:run_inference_validation:0.0.1 inputs: validation_id: ${{parent.inputs.validation_id}} sku: ${{parent.inputs.instance_type}} @@ -170,14 +170,14 @@ jobs: delete_endpoints: type: command - component: azureml://registries/azureml-preview-test1/components/delete_endpoint/versions/0.0.7.1 + component: azureml:delete_endpoint:0.0.7 inputs: model_deployment_details: ${{parent.jobs.online_deployment_model.outputs.model_deployment_details}} endpoint_name: ${{parent.inputs.endpoint_name}} publish_results: type: command - component: azureml://registries/azureml-preview-test1/components/publish_validation_results_selfserve/versions/0.0.1.23 + component: azureml:publish_validation_results_selfserve:0.0.1 inputs: publisher_name: ${{parent.inputs.publisher_name}} model_name: ${{parent.inputs.model_name}} diff --git a/assets/training/model_management/src/run_inference_validation.py b/assets/training/model_management/src/run_inference_validation.py index 39164a0eb4..afc9df212f 100644 --- a/assets/training/model_management/src/run_inference_validation.py +++ b/assets/training/model_management/src/run_inference_validation.py @@ -294,7 +294,7 @@ def run_inference_validation(): deployment_error = f.read().strip() error_message += deployment_error except Exception as e: - logger.warning(f"Failed to read validation_error file: {e}") + logger.warning(f"Failed to read deployment_error file: {e}") if args.validation_error: with open(args.validation_error, "w") as error_file: @@ -331,7 +331,7 @@ def run_inference_validation(): try: inference_response = json.loads(inference_response) except json.JSONDecodeError as e: - logger.warning(f"Failed to parse actualResponse as JSON: {e}") + logger.warning(f"Failed to parse actual response as JSON: {e}") if inference_response is None: logger.warning("Actual response is missing or invalid. Setting it to an empty structure.") @@ -385,7 +385,6 @@ def run_inference_validation(): if args.validation_error: with open(args.validation_error, "w") as error_file: error_file.write(error_message) - # raise Exception(f"Failed to run inference validation: {error_message}") def main(): run_inference_validation() From b301dddbb01fb93ea732e59c937bb685b7dfe17d Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Mon, 19 May 2025 10:32:31 +0000 Subject: [PATCH 27/29] reverted deploy_model component --- assets/common/src/deploy.py | 227 ++++++++++++------------------------ 1 file changed, 74 insertions(+), 153 deletions(-) diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index 204b04623e..c0558feebe 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -6,8 +6,6 @@ import json import re import time -import base64 -import traceback from azure.ai.ml.entities import ( ManagedOnlineEndpoint, @@ -62,11 +60,6 @@ def parse_args(): type=Path, help="Json file with inference endpoint payload.", ) - parser.add_argument( - "--inference_payload_str", - type=str, - help="Serialized JSON payload for inference.", - ) parser.add_argument( "--endpoint_name", type=str, @@ -169,16 +162,6 @@ def parse_args(): type=str, help="Json file to which deployment details will be written", ) - parser.add_argument( - "--model_inference_response", - type=str, - help="Path to the inference response JSON file.", - ) - parser.add_argument( - "--deploy_error", - type=str, - help="Path to the inference response JSON file.", - ) # parse args args = parser.parse_args() logger.info(f"Args received {args}") @@ -197,7 +180,7 @@ def parse_args(): def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args): """Create endpoint and deployment and return details.""" - endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token") + endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="key") # deployment deployment = ManagedOnlineDeployment( @@ -274,147 +257,85 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen @swallow_all_exceptions(logger) def main(): """Run main function.""" - try: - args = parse_args() - logger.info(f"Arguments: {args}") - ml_client = get_mlclient() - error_message = "" - if args.model_deployment_details: - with open(args.model_deployment_details, "w") as outfile: - json.dump({}, outfile) - - if args.model_inference_response: - with open(args.model_inference_response, "w") as f: - json.dump({}, f, indent=4) - - if args.deploy_error: - with open(args.deploy_error, "w") as error_file: - error_file.write(error_message) - - # get registered model id - if args.model_id: - model_id = str(args.model_id) - elif args.registration_details_folder: - registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE - if registration_details_file.exists(): - try: - with open(registration_details_file) as f: - model_info = json.load(f) - model_id = model_info["id"] - except Exception as e: - raise Exception(f"model_registration_details json file is missing model information {e}.") - else: - raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.") - else: - raise Exception("Arguments model_id and registration_details both are missing.") - - # Endpoint has following restrictions: - # 1. Name must begin with lowercase letter - # 2. Followed by lowercase letters, hyphen or numbers - # 3. End with a lowercase letter or number - - # 1. Replace underscores and slashes by hyphens and convert them to lower case. - # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it - model_name = get_model_name(model_id) - - endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21] - endpoint_name = f"{endpoint_name}-{int(time.time())}" - endpoint_name = endpoint_name - - endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name - deployment_name = args.deployment_name if args.deployment_name else "default" - - endpoint, deployment = create_endpoint_and_deployment( - ml_client=ml_client, - endpoint_name=endpoint_name, - deployment_name=deployment_name, - model_id=model_id, - args=args - ) - - response = None - if args.inference_payload or args.inference_payload_str: - print("Invoking inference with test payload ...") + args = parse_args() + ml_client = get_mlclient() + # get registered model id + + if args.model_id: + model_id = str(args.model_id) + elif args.registration_details_folder: + registration_details_file = args.registration_details_folder/ComponentVariables.REGISTRATION_DETAILS_JSON_FILE + if registration_details_file.exists(): try: - start_time = time.time() - if args.inference_payload_str: - print(f"Inference payload string: {args.inference_payload_str}") - decoded_bytes = base64.b64decode(args.inference_payload_str) - - # Convert bytes to string - decoded_str = decoded_bytes.decode('utf-8') - logger.info(f"Decoded string: {decoded_str}") - - payload = json.loads(decoded_str) - logger.info(f"Payload:\n {payload}") - - with open("payload.json", "w") as temp_file: - json.dump(payload, temp_file) - - response = ml_client.online_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - request_file="payload.json", - ) - elif args.inference_payload: - response = ml_client.online_endpoints.invoke( - endpoint_name=endpoint_name, - deployment_name=deployment_name, - request_file=args.inference_payload, - ) - - end_time = time.time() - inference_time_ms = int((end_time - start_time) * 1000) - - logger.info(f"Endpoint invoked successfully with inference time :{inference_time_ms} ms " + - f"and response: {response}") - # Save inference response - if args.model_inference_response: - inference_result = { - "response": response, - "inference_time": inference_time_ms - } - with open(args.model_inference_response, "w") as f: - json.dump(inference_result, f, indent=4) - logger.info(f"Saved inference response and inference time to output JSON file: {inference_result}") + with open(registration_details_file) as f: + model_info = json.load(f) + model_id = model_info["id"] except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(OnlineEndpointInvocationError, exception=e) - ) - - print("Saving deployment details ...") - - # write deployment details to file - endpoint_type = "aml_online_inference" - deployment_details = { - "endpoint_name": endpoint.name, - "deployment_name": deployment.name, - "endpoint_uri": endpoint.__dict__["_scoring_uri"], - "endpoint_type": endpoint_type, - "instance_type": args.instance_type, - "instance_count": args.instance_count, - "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance, - } - json_object = json.dumps(deployment_details, indent=4) - with open(args.model_deployment_details, "w") as outfile: - outfile.write(json_object) - logger.info("Saved deployment details in output json file.") + raise Exception(f"model_registration_details json file is missing model information {e}.") + else: + raise Exception(f"{ComponentVariables.REGISTRATION_DETAILS_JSON_FILE} is missing inside folder.") + else: + raise Exception("Arguments model_id and registration_details both are missing.") - except Exception as e: - # Capture the full traceback - stack_trace = traceback.format_exc() - error_message = f"Model deployment failed.\n{stack_trace}" - logger.error(f"error_message from stack trace: {error_message}, deploy_error_path: {args.deploy_error}") + # Endpoint has following restrictions: + # 1. Name must begin with lowercase letter + # 2. Followed by lowercase letters, hyphen or numbers + # 3. End with a lowercase letter or number - # Write the error message to the specified error output file - if args.deploy_error: - with open(args.deploy_error, "w") as error_file: - error_file.write(error_message) + # 1. Replace underscores and slashes by hyphens and convert them to lower case. + # 2. Take 21 chars from model name and append '-' & timstamp(10chars) to it + model_name = get_model_name(model_id) - # Re-raise the exception with the full traceback - # raise Exception(error_message) + endpoint_name = re.sub("[^A-Za-z0-9]", "-", model_name).lower()[:21] + endpoint_name = f"{endpoint_name}-{int(time.time())}" + endpoint_name = endpoint_name + endpoint_name = args.endpoint_name if args.endpoint_name else endpoint_name + deployment_name = args.deployment_name if args.deployment_name else "default" + + endpoint, deployment = create_endpoint_and_deployment( + ml_client=ml_client, + endpoint_name=endpoint_name, + deployment_name=deployment_name, + model_id=model_id, + args=args + ) + + if args.inference_payload: + print("Invoking inference with test payload ...") + try: + response = ml_client.online_endpoints.invoke( + endpoint_name=endpoint_name, + deployment_name=deployment_name, + request_file=args.inference_payload, + ) + print(f"Response:\n{response}") + logger.info(f"Endpoint invoked successfully with response :{response}") + except Exception as e: + raise AzureMLException._with_error( + AzureMLError.create(OnlineEndpointInvocationError, exception=e) + ) + print("Saving deployment details ...") + + # write deployment details to file + endpoint_type = "aml_online_inference" + deployment_details = { + "endpoint_name": endpoint.name, + "deployment_name": deployment.name, + "endpoint_uri": endpoint.__dict__["_scoring_uri"], + "endpoint_type": endpoint_type, + "instance_type": args.instance_type, + "instance_count": args.instance_count, + "max_concurrent_requests_per_instance": args.max_concurrent_requests_per_instance, + } + json_object = json.dumps(deployment_details, indent=4) + with open(args.model_deployment_details, "w") as outfile: + outfile.write(json_object) + logger.info("Saved deployment details in output json file.") + + +# run script if __name__ == "__main__": # run main function - main() + main() \ No newline at end of file From 992f9499bc70b6e13b62a8a534493c57254ec2a9 Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Mon, 19 May 2025 12:14:33 +0000 Subject: [PATCH 28/29] reverted deploy_model component --- .../common/components/deploy_model/spec.yaml | 225 ------------------ 1 file changed, 225 deletions(-) delete mode 100644 assets/common/components/deploy_model/spec.yaml diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml deleted file mode 100644 index f4678e2426..0000000000 --- a/assets/common/components/deploy_model/spec.yaml +++ /dev/null @@ -1,225 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: deploy_model -version: 0.0.12.29 -type: command - -is_deterministic: True - -display_name: Deploy model -description: - Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. - -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/29 - -code: ../../src -command: >- - python deploy.py - $[[--registration_details_folder ${{inputs.registration_details_folder}}]] - $[[--model_id ${{inputs.model_id}}]] - $[[--inference_payload ${{inputs.inference_payload}}]] - $[[--inference_payload_str ${{inputs.inference_payload_str}}]] - $[[--endpoint_name ${{inputs.endpoint_name}}]] - $[[--deployment_name ${{inputs.deployment_name}}]] - $[[--instance_type ${{inputs.instance_type}}]] - $[[--instance_count ${{inputs.instance_count}}]] - $[[--max_concurrent_requests_per_instance ${{inputs.max_concurrent_requests_per_instance}}]] - $[[--request_timeout_ms ${{inputs.request_timeout_ms}}]] - $[[--max_queue_wait_ms ${{inputs.max_queue_wait_ms}}]] - $[[--failure_threshold_readiness_probe ${{inputs.failure_threshold_readiness_probe}}]] - $[[--success_threshold_readiness_probe ${{inputs.success_threshold_readiness_probe}}]] - $[[--timeout_readiness_probe ${{inputs.timeout_readiness_probe}}]] - $[[--period_readiness_probe ${{inputs.period_readiness_probe}}]] - $[[--initial_delay_readiness_probe ${{inputs.initial_delay_readiness_probe}}]] - $[[--failure_threshold_liveness_probe ${{inputs.failure_threshold_liveness_probe}}]] - $[[--timeout_liveness_probe ${{inputs.timeout_liveness_probe}}]] - $[[--period_liveness_probe ${{inputs.period_liveness_probe}}]] - $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]] - $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]] - --model_deployment_details ${{outputs.model_deployment_details}} - --model_inference_response ${{outputs.model_inference_response}} - --deploy_error ${{outputs.deploy_error}} - -inputs: - # Output of registering component - registration_details_folder: - type: uri_folder - optional: true - description: Folder containing model registration details in a JSON file named model_registration_details.json - - model_id: - type: string - optional: true - description: | - Asset ID of the model registered in workspace/registry. - Registry - azureml://registries//models//versions/ - Workspace - azureml:: - - inference_payload: - type: uri_file - optional: true - description: JSON payload which would be used to validate deployment - - inference_payload_str: - type: string - optional: true - description: Serialized JSON payload which would be used to validate deployment - - endpoint_name: - type: string - optional: true - description: Name of the endpoint - - deployment_name: - type: string - optional: true - default: default - description: Name of the deployment - - instance_type: - type: string - optional: true - enum: - - Standard_DS1_v2 - - Standard_DS2_v2 - - Standard_DS3_v2 - - Standard_DS4_v2 - - Standard_DS5_v2 - - Standard_F2s_v2 - - Standard_F4s_v2 - - Standard_F8s_v2 - - Standard_F16s_v2 - - Standard_F32s_v2 - - Standard_F48s_v2 - - Standard_F64s_v2 - - Standard_F72s_v2 - - Standard_FX24mds - - Standard_FX36mds - - Standard_FX48mds - - Standard_E2s_v3 - - Standard_E4s_v3 - - Standard_E8s_v3 - - Standard_E16s_v3 - - Standard_E32s_v3 - - Standard_E48s_v3 - - Standard_E64s_v3 - - Standard_NC4as_T4_v3 - - Standard_NC6s_v2 - - Standard_NC6s_v3 - - Standard_NC8as_T4_v3 - - Standard_NC12s_v2 - - Standard_NC12s_v3 - - Standard_NC16as_T4_v3 - - Standard_NC24s_v2 - - Standard_NC24s_v3 - - Standard_NC24rs_v3 - - Standard_NC24ads_A100_v4 - - Standard_NC48ads_A100_v4 - - Standard_NC96ads_A100_v4 - - Standard_NC64as_T4_v3 - - Standard_ND40rs_v2 - - Standard_ND96asr_v4 - - Standard_ND96amsr_A100_v4 - default: Standard_NC24s_v3 - description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available. - - instance_count: - type: integer - optional: true - default: 1 - description: Number of instances you want to use for deployment. Make sure instance type have enough quota available. - - max_concurrent_requests_per_instance: - type: integer - default: 1 - optional: true - description: Maximum concurrent requests to be handled per instance - - request_timeout_ms: - type: integer - default: 60000 - optional: true - description: Request timeout in ms. Max limit is 90000. - - max_queue_wait_ms: - type: integer - default: 60000 - optional: true - description: Maximum queue wait time of a request in ms - - failure_threshold_readiness_probe: - type: integer - default: 10 - optional: true - description: The number of times system will try after failing the readiness probe - - success_threshold_readiness_probe: - type: integer - default: 1 - optional: true - description: The minimum consecutive successes for the readiness probe to be considered successful after having failed - - timeout_readiness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after which the readiness probe times out - - period_readiness_probe: - type: integer - default: 10 - optional: true - description: How often (in seconds) to perform the readiness probe - - initial_delay_readiness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after the container has started before the readiness probe is initiated - - failure_threshold_liveness_probe: - type: integer - default: 30 - optional: true - description: The number of times system will try after failing the liveness probe - - timeout_liveness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after which the liveness probe times out - - period_liveness_probe: - type: integer - default: 10 - optional: true - description: How often (in seconds) to perform the liveness probe - - initial_delay_liveness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after the container has started before the liveness probe is initiated - - egress_public_network_access: - type: string - default: enabled - optional: true - enum: - - enabled - - disabled - description: Setting it to disabled secures the deployment by restricting communication between the deployment and the Azure resources used by it - -outputs: - model_deployment_details: - type: uri_file - description: Json file to which deployment details will be written - model_inference_response: - type: uri_file - description: JSON file containing inference results - deploy_error: - type: uri_file - description: File containing error messages or stack traces from the validation step. - -tags: - Preview: "" - Internal: "" From b1c61c892900b9be33a1bc97daccd13be0d172de Mon Sep 17 00:00:00 2001 From: abhishekMS2024 Date: Thu, 22 May 2025 09:37:17 +0000 Subject: [PATCH 29/29] Maap Self serve validation AML pipeline custom model support --- .../components/deploy_inference_model/spec.yaml | 9 +++++++++ assets/common/src/deploy_inference_model.py | 14 +++++++++++++- .../publish_validation_results_selfserve/spec.yaml | 4 ++-- .../components/run_inference_validation/spec.yaml | 4 ++-- .../components/validate_model_inference/spec.yaml | 9 +++++++++ 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/assets/common/components/deploy_inference_model/spec.yaml b/assets/common/components/deploy_inference_model/spec.yaml index efae05fea4..407c0a2416 100644 --- a/assets/common/components/deploy_inference_model/spec.yaml +++ b/assets/common/components/deploy_inference_model/spec.yaml @@ -16,6 +16,7 @@ command: >- python deploy_inference_model.py $[[--registration_details_folder ${{inputs.registration_details_folder}}]] $[[--model_id ${{inputs.model_id}}]] + $[[--environment_id ${{inputs.environment_id}}]] $[[--inference_payload ${{inputs.inference_payload}}]] $[[--inference_payload_str ${{inputs.inference_payload_str}}]] $[[--endpoint_name ${{inputs.endpoint_name}}]] @@ -54,6 +55,14 @@ inputs: Registry - azureml://registries//models//versions/ Workspace - azureml:: + environment_id: + type: string + optional: true + description: | + Asset ID of the environment registered in workspace/registry. + Registry - azureml://registries//environments//versions/ + Workspace - azureml:: + inference_payload: type: uri_file optional: true diff --git a/assets/common/src/deploy_inference_model.py b/assets/common/src/deploy_inference_model.py index 7230258fe5..0fa2f68be7 100644 --- a/assets/common/src/deploy_inference_model.py +++ b/assets/common/src/deploy_inference_model.py @@ -57,6 +57,12 @@ def parse_args(): type=str, help="Registered mlflow model id", ) + parser.add_argument( + "--environment_id", + type=str, + required=False, + help="AzureML environment ID to use for deployment", + ) parser.add_argument( "--inference_payload", type=Path, @@ -195,7 +201,7 @@ def parse_args(): return args -def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deployment_name, args): +def create_endpoint_and_deployment(ml_client, model_id, environment_id, endpoint_name, deployment_name, args): """Create endpoint and deployment and return details.""" endpoint = ManagedOnlineEndpoint(name=endpoint_name, auth_mode="aad_token") @@ -204,6 +210,7 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen name=deployment_name, endpoint_name=endpoint_name, model=model_id, + environment=environment_id, instance_type=args.instance_type, instance_count=args.instance_count, request_settings=OnlineRequestSettings( @@ -278,6 +285,7 @@ def main(): args = parse_args() logger.info(f"Arguments: {args}") ml_client = get_mlclient() + error_message = "" if args.model_deployment_details: with open(args.model_deployment_details, "w") as outfile: @@ -291,6 +299,9 @@ def main(): with open(args.deploy_error, "w") as error_file: error_file.write(error_message) + # get environment id + environment_id = args.environment_id if hasattr(args, "environment_id") else None + # get registered model id if args.model_id: model_id = str(args.model_id) @@ -329,6 +340,7 @@ def main(): endpoint_name=endpoint_name, deployment_name=deployment_name, model_id=model_id, + environment_id=environment_id, args=args ) diff --git a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml index 498fde2dc1..db5ab152ff 100644 --- a/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml +++ b/assets/training/model_management/components/publish_validation_results_selfserve/spec.yaml @@ -3,12 +3,12 @@ type: command is_deterministic: true name: publish_validation_results_selfserve -version: 0.0.1.23 +version: 0.0.1 display_name: Publish model validation results to Self-Serve description: | This component publishes model validation results to the Self-Serve database. -environment: azureml://registries/azureml/environments/model-management/versions/41 +environment: azureml://registries/azureml/environments/model-management/versions/47 inputs: selfserve_base_url: diff --git a/assets/training/model_management/components/run_inference_validation/spec.yaml b/assets/training/model_management/components/run_inference_validation/spec.yaml index c5e9f5cfb0..3feba65c60 100644 --- a/assets/training/model_management/components/run_inference_validation/spec.yaml +++ b/assets/training/model_management/components/run_inference_validation/spec.yaml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json name: run_inference_validation -version: 0.0.1.66 +version: 0.0.1 type: command is_deterministic: True @@ -8,7 +8,7 @@ is_deterministic: True display_name: Run Inference Validation description: Compares the expected inference response with the actual response from model deployment. -environment: azureml://registries/azureml/environments/model-management/versions/41 +environment: azureml://registries/azureml/environments/model-management/versions/47 code: ../../src command: >- diff --git a/assets/training/model_management/components/validate_model_inference/spec.yaml b/assets/training/model_management/components/validate_model_inference/spec.yaml index b754f283e3..49524c34c8 100644 --- a/assets/training/model_management/components/validate_model_inference/spec.yaml +++ b/assets/training/model_management/components/validate_model_inference/spec.yaml @@ -74,6 +74,14 @@ inputs: Registry - azureml://registries//models//versions/ Workspace - azureml:: + environment_id: + type: string + optional: false + description: | + Asset ID of the environment registered in workspace/registry. + Registry - azureml://registries//environments//versions/ + Workspace - azureml:: + model_name: type: string optional: false @@ -140,6 +148,7 @@ jobs: compute: ${{parent.inputs.compute}} inputs: model_id: ${{parent.inputs.model_id}} + environment_id: ${{parent.inputs.environment_id}} inference_payload_str: ${{parent.inputs.inference_payload}} endpoint_name: ${{parent.inputs.endpoint_name}} deployment_name: ${{parent.inputs.deployment_name}}