From 3b75a5382390ba17dbc706063998695cc8ee61ff Mon Sep 17 00:00:00 2001 From: Sarthak Singhal Date: Tue, 15 Apr 2025 18:55:35 +0530 Subject: [PATCH 1/6] Remove `azureml-core` from conda_dependencies.yaml for `python-sdk-v2` --- .../environments/python-sdk-v2/context/conda_dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/common/environments/python-sdk-v2/context/conda_dependencies.yaml b/assets/common/environments/python-sdk-v2/context/conda_dependencies.yaml index 1576bb8a85..17e49155fd 100644 --- a/assets/common/environments/python-sdk-v2/context/conda_dependencies.yaml +++ b/assets/common/environments/python-sdk-v2/context/conda_dependencies.yaml @@ -8,7 +8,7 @@ dependencies: - azure-ai-ml=={{latest-pypi-version}} - azure-identity=={{latest-pypi-version}} # v1 package for fetching run details - - azureml-core=={{latest-pypi-version}} + # - azureml-core=={{latest-pypi-version}} # v1 package for logging and telemetry. Switch to using application-insights and implementing custom logging - azureml-telemetry=={{latest-pypi-version}} - cryptography=={{latest-pypi-version}} From ff7012e3bc7f3025aef4c645303fcab590e311fe Mon Sep 17 00:00:00 2001 From: Sarthak Singhal Date: Tue, 15 Apr 2025 18:56:41 +0530 Subject: [PATCH 2/6] Migrate Exceptions for components dependent on `python-sdk-v2` - I --- assets/common/src/batch_deploy.py | 36 +++++++++++++++++++------------ assets/common/src/deploy.py | 28 ++++++++++++++---------- assets/common/src/register.py | 17 +++++++++------ 3 files changed, 50 insertions(+), 31 deletions(-) diff --git a/assets/common/src/batch_deploy.py b/assets/common/src/batch_deploy.py index 9365e802e7..530c05d571 100644 --- a/assets/common/src/batch_deploy.py +++ b/assets/common/src/batch_deploy.py @@ -15,8 +15,7 @@ BatchDeployment, BatchRetrySettings, ) -from azureml._common._error_definition import AzureMLError -from azureml._common.exceptions import AzureMLException +from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException from pathlib import Path from utils.config import AppName, ComponentVariables @@ -24,10 +23,7 @@ from utils.logging_utils import custom_dimensions, get_logger from utils.exceptions import ( swallow_all_exceptions, - BatchEndpointInvocationError, - EndpointCreationError, - DeploymentCreationError, - ComputeCreationError, + ModelImportErrorStrings, ) @@ -216,8 +212,11 @@ def invoke_endpoint_job(ml_client, endpoint, type, args): download_batch_output(ml_client, job, args) except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(BatchEndpointInvocationError, exception=e) + message = ModelImportErrorStrings.BATCH_ENDPOINT_INVOCATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT, + error=e ) @@ -239,8 +238,11 @@ def get_or_create_compute(ml_client, compute_name, args): ml_client.begin_create_or_update(compute_cluster).wait() logger.info("Compute cluster created successfully.") except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(ComputeCreationError, exception=e) + message = ModelImportErrorStrings.COMPUTE_CREATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPUTE, + error=e ) return compute_cluster @@ -272,16 +274,22 @@ def create_endpoint_and_deployment(ml_client, compute_name, model_id, endpoint_n ml_client.begin_create_or_update(endpoint).wait() logger.info("Endpoint created successfully.") except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(EndpointCreationError, exception=e) + message = ModelImportErrorStrings.ENDPOINT_CREATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT, + error=e ) try: logger.info(f"Creating deployment {deployment}") ml_client.batch_deployments.begin_create_or_update(deployment).wait() except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(DeploymentCreationError, exception=e) + message = ModelImportErrorStrings.DEPLOYMENT_CREATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.DEPLOYMENT, + error=e ) logger.info("Deployment successful.") diff --git a/assets/common/src/deploy.py b/assets/common/src/deploy.py index a4afa64493..5b70e8a62e 100644 --- a/assets/common/src/deploy.py +++ b/assets/common/src/deploy.py @@ -13,8 +13,7 @@ OnlineRequestSettings, ProbeSettings, ) -from azureml._common._error_definition import AzureMLError -from azureml._common.exceptions import AzureMLException +from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException from pathlib import Path from utils.config import AppName, ComponentVariables @@ -22,9 +21,7 @@ from utils.logging_utils import custom_dimensions, get_logger from utils.exceptions import ( swallow_all_exceptions, - OnlineEndpointInvocationError, - EndpointCreationError, - DeploymentCreationError, + ModelImportErrorStrings, ) @@ -216,8 +213,11 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen endpoint = ml_client.online_endpoints.get(endpoint.name) logger.info(f"Endpoint created {endpoint.id}") except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(EndpointCreationError, exception=e) + message = ModelImportErrorStrings.ENDPOINT_CREATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT, + error=e ) try: @@ -235,8 +235,11 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen except Exception as ex: logger.error(f"Error in fetching deployment logs: {ex}") - raise AzureMLException._with_error( - AzureMLError.create(DeploymentCreationError, exception=e) + message = ModelImportErrorStrings.DEPLOYMENT_CREATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.DEPLOYMENT, + error=e ) logger.info(f"Deployment successful. Updating endpoint to take 100% traffic for deployment {deployment_name}") @@ -312,8 +315,11 @@ def main(): print(f"Response:\n{response}") logger.info(f"Endpoint invoked successfully with response :{response}") except Exception as e: - raise AzureMLException._with_error( - AzureMLError.create(OnlineEndpointInvocationError, exception=e) + message = ModelImportErrorStrings.ONLINE_ENDPOINT_INVOCATION_ERROR + raise MlException( + message=message.format(exception=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT, + error=e ) print("Saving deployment details ...") diff --git a/assets/common/src/register.py b/assets/common/src/register.py index 7b9526ac67..993e2884ed 100644 --- a/assets/common/src/register.py +++ b/assets/common/src/register.py @@ -13,16 +13,14 @@ from pathlib import Path from azure.ai.ml.constants import AssetTypes from azure.ai.ml.entities import Model -from azureml._common._error_definition import AzureMLError -from azureml._common.exceptions import AzureMLException +from azure.ai.ml.exceptions import ValidationException, ErrorTarget, ErrorCategory from utils.common_utils import get_mlclient, get_job_asset_uri from utils.config import AppName, ComponentVariables from utils.logging_utils import custom_dimensions, get_logger from utils.exceptions import ( swallow_all_exceptions, - UnSupportedModelTypeError, - MissingModelNameError, + ModelImportErrorStrings ) @@ -139,10 +137,17 @@ def main(): # validations if model_type not in SUPPORTED_MODEL_ASSET_TYPES: - raise AzureMLException._with_error(AzureMLError.create(UnSupportedModelTypeError, model_type=model_type)) + message = ModelImportErrorStrings.UNSUPPORTED_MODEL_TYPE_ERROR.format(model_type=model_type) + raise ValidationException( + message=message, no_personal_data_message=message, target=ErrorTarget.MODEL + ) if not model_name: - raise AzureMLException._with_error(AzureMLError.create(MissingModelNameError)) + message = ModelImportErrorStrings.MISSING_MODEL_NAME_ERROR + raise ValidationException( + message=message, no_personal_data_message=message, + target=ErrorTarget.MODEL, error_category=ErrorCategory.USER_ERROR + ) if not re.match(VALID_MODEL_NAME_PATTERN, model_name): # update model name to one supported for registration From a0cd7fc8d7dfb8e1e32ef4fc82d66ddc9bdad97f Mon Sep 17 00:00:00 2001 From: Sarthak Singhal Date: Wed, 16 Apr 2025 02:19:55 +0530 Subject: [PATCH 3/6] Migrate Exceptions for components dependent on `python-sdk-v2` - II --- assets/common/src/delete_endpoint.py | 29 ++++++++++---- .../src/run_mlflow_model_local_validation.py | 38 +++++++++++++------ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/assets/common/src/delete_endpoint.py b/assets/common/src/delete_endpoint.py index 7175ffb96d..6cd9ecf489 100644 --- a/assets/common/src/delete_endpoint.py +++ b/assets/common/src/delete_endpoint.py @@ -7,8 +7,8 @@ import json from azure.ai.ml import MLClient from azure.identity import ManagedIdentityCredential +from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException from azure.ai.ml.identity import AzureMLOnBehalfOfCredential -from azureml.core import Run from pathlib import Path @@ -59,15 +59,30 @@ def get_ml_client(): try: credential.get_token("https://management.azure.com/.default") except Exception as ex: - raise (f"Failed to get credentials : {ex}") - run = Run.get_context(allow_offline=False) - ws = run.experiment.workspace + message = "Failed to get credentials : {ex}" + raise MlException( + message=message.format(ex=ex), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.IDENTITY, + error=ex + ) + + try: + subscription_id = os.environ['AZUREML_ARM_SUBSCRIPTION'] + resource_group = os.environ["AZUREML_ARM_RESOURCEGROUP"] + workspace = os.environ["AZUREML_ARM_WORKSPACE_NAME"] + except Exception as ex: + message = "Failed to get AzureML ARM env variable : {ex}" + raise MlException( + message=message.format(ex=ex), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPONENT, + error=ex + ) ml_client = MLClient( credential=credential, - subscription_id=ws._subscription_id, - resource_group_name=ws._resource_group, - workspace_name=ws._workspace_name, + subscription_id=subscription_id, + resource_group_name=resource_group, + workspace_name=workspace, ) return ml_client diff --git a/assets/common/src/run_mlflow_model_local_validation.py b/assets/common/src/run_mlflow_model_local_validation.py index 57ae72420c..4d2f028582 100644 --- a/assets/common/src/run_mlflow_model_local_validation.py +++ b/assets/common/src/run_mlflow_model_local_validation.py @@ -6,18 +6,15 @@ import argparse import os import shutil -from azureml._common._error_definition import AzureMLError -from azureml._common.exceptions import AzureMLException +from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException from pathlib import Path from utils.config import AppName -from utils.exceptions import swallow_all_exceptions from utils.logging_utils import custom_dimensions, get_logger from utils.common_utils import run_command from utils.exceptions import ( - CondaEnvCreationError, - CondaFileMissingError, - MlflowModelValidationError, + swallow_all_exceptions, + ModelImportErrorStrings, ) @@ -58,7 +55,11 @@ def run(): conda_file_path = os.path.join(model_dir, CONDA_FILE_NAME) if not os.path.exists(conda_file_path): - raise AzureMLException._with_error(AzureMLError.create(CondaFileMissingError)) + message = ModelImportErrorStrings.CONDA_FILE_MISSING_ERROR + raise MlException( + message=message, no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.MODEL + ) # copy conda.yaml to cwd shutil.copy(conda_file_path, CONDA_FILE_NAME) @@ -68,12 +69,22 @@ def run(): exit_code, stdout = run_command(CREATE_CONDA_CMD.format(ENV_PREFIX, CONDA_FILE_NAME)) if exit_code != 0: logger.warning(f"Error in creating conda env. Error details {stdout}") - raise AzureMLException._with_error(AzureMLError.create(CondaEnvCreationError)) + message = ModelImportErrorStrings.CONDA_ENV_CREATION_ERROR + raise MlException( + message=message, no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENVIRONMENT, + error=stdout + ) exit_code, stdout = run_command(CONDA_LIST.format(ENV_PREFIX)) if exit_code != 0: - logger.warning(f"Error in listing env at {ENV_PREFIX}. Error details {stdout}") - raise AzureMLException._with_error(AzureMLError.create(CondaEnvCreationError)) + message = "Error in listing env at {ENV_PREFIX}. Error details {stdout}" + logger.warning(message.format(ENV_PREFIX=ENV_PREFIX, stdout=stdout)) + raise MlException( + message=message, no_personal_data_message=message.format(ENV_PREFIX=ENV_PREFIX, stdout=stdout), + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENVIRONMENT, + error=stdout + ) logger.info(f"pip list: \n{stdout}") cmd = f"python {SCRIPT_PATH} --model-path {model_dir}" @@ -97,7 +108,12 @@ def run(): if exit_code != 0: logger.warning(f"Local validation failed. Error {stdout}") - raise AzureMLException._with_error(AzureMLError.create(MlflowModelValidationError)) + message = ModelImportErrorStrings.MLFLOW_LOCAL_VALIDATION_ERROR + raise MlException( + message=message, no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENVIRONMENT, + error=stdout + ) # copy the model to output dir shutil.copytree(src=model_dir, dst=output_model_path, dirs_exist_ok=True) From 3af1759db9df4649c146eef743dd0cee9de7e218 Mon Sep 17 00:00:00 2001 From: Sarthak Singhal Date: Tue, 22 Apr 2025 16:49:00 +0530 Subject: [PATCH 4/6] Migrate Exceptions for components dependent on `python-sdk-v2` - III --- assets/common/src/utils/common_utils.py | 50 +++++++++++++++++-------- assets/common/src/utils/exceptions.py | 10 ++++- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/assets/common/src/utils/common_utils.py b/assets/common/src/utils/common_utils.py index 40f827541e..e955f3ce86 100644 --- a/assets/common/src/utils/common_utils.py +++ b/assets/common/src/utils/common_utils.py @@ -9,16 +9,14 @@ from azure.ai.ml import MLClient from azure.ai.ml.identity import AzureMLOnBehalfOfCredential from azure.identity import ManagedIdentityCredential -from azureml._common._error_definition import AzureMLError -from azureml._common.exceptions import AzureMLException -from azureml.core.run import Run +from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException, ValidationException from pathlib import Path from subprocess import PIPE, run, STDOUT from typing import Tuple -from utils.logging_utils import get_logger -from utils.run_utils import JobRunDetails -from utils.exceptions import UserIdentityMissingError, InvalidModelIDError +from logging_utils import get_logger +from run_utils import JobRunDetails +from exceptions import ModelImportErrorStrings logger = get_logger(__name__) @@ -59,16 +57,30 @@ def get_mlclient(registry_name: str = None): # Check if given credential can get token successfully. credential.get_token("https://management.azure.com/.default") except Exception as ex: - raise AzureMLException._with_error(AzureMLError.create(UserIdentityMissingError, exception=ex)) + message = ModelImportErrorStrings.USER_IDENTITY_MISSING_ERROR + raise MlException( + message=message.format(ex=ex), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.IDENTITY, + error=ex + ) + try: + subscription_id = os.environ['AZUREML_ARM_SUBSCRIPTION'] + resource_group = os.environ["AZUREML_ARM_RESOURCEGROUP"] + workspace = os.environ["AZUREML_ARM_WORKSPACE_NAME"] + except Exception as ex: + message = "Failed to get AzureML ARM env variable : {ex}" + raise MlException( + message=message.format(ex=ex), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPONENT, + error=ex + ) if registry_name is None: - run = Run.get_context(allow_offline=False) - ws = run.experiment.workspace return MLClient( credential=credential, - subscription_id=ws._subscription_id, - resource_group_name=ws._resource_group, - workspace_name=ws._workspace_name, + subscription_id=subscription_id, + resource_group_name=resource_group, + workspace_name=workspace, ) logger.info(f"Creating MLClient with registry name {registry_name}") return MLClient(credential=credential, registry_name=registry_name) @@ -81,7 +93,11 @@ def get_model_name(model_id: str): if match: return match.group(2) or match.group(5) else: - raise AzureMLException._with_error(AzureMLError.create(InvalidModelIDError, model_id=model_id)) + message = ModelImportErrorStrings.INVALID_MODEL_ID_ERROR + raise ValidationException( + message=message.format(model_id=model_id), no_personal_data_message=message, + error_category=ErrorCategory.USER_ERROR, target=ErrorTarget.COMPONENT + ) def get_model_name_version(model_id: str): @@ -100,8 +116,12 @@ def get_model_name_version(model_id: str): logger.info(f"ws asset URI, returning {match.group(1)}, {match.group(2)}") return match.group(2) or match.group(5) - logger.info(f"Unsupported model asset uri: {model_id}") - raise AzureMLException._with_error(AzureMLError.create(InvalidModelIDError, model_id=model_id)) + message = "Unsupported model asset uri: {model_id}" + logger.info(message.format(model_id=model_id)) + raise MlException( + message=message.format(model_id=model_id), no_personal_data_message=message, + error_category=ErrorCategory.USER_ERROR, target=ErrorTarget.COMPONENT + ) def get_job_uri_from_input_run_assetId(assetID: str): diff --git a/assets/common/src/utils/exceptions.py b/assets/common/src/utils/exceptions.py index 2cb7b93ad2..8c683f2921 100644 --- a/assets/common/src/utils/exceptions.py +++ b/assets/common/src/utils/exceptions.py @@ -9,6 +9,7 @@ from azureml._common.exceptions import AzureMLException from azureml._common._error_definition.azureml_error import AzureMLError # type: ignore from azureml._common._error_definition.system_error import ClientError # type: ignore +from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException class ModelImportErrorStrings: @@ -219,10 +220,15 @@ def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: - if isinstance(e, AzureMLException): + if isinstance(e, MlException): azureml_exception = e else: - azureml_exception = AzureMLException._with_error(AzureMLError.create(ModelImportError, error=e)) + message = ModelImportErrorStrings.LOG_UNSAFE_GENERIC_ERROR + azureml_exception = MlException( + message=message.format(error=e), no_personal_data_message=message, + error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPONENT, + error=e + ) logger.error("Exception {} when calling {}".format(azureml_exception, func.__name__)) for handler in logger.handlers: From 7e6e81a5b0a34484ccfb59facb039750b2ed9176 Mon Sep 17 00:00:00 2001 From: Sarthak Singhal Date: Tue, 22 Apr 2025 17:16:09 +0530 Subject: [PATCH 5/6] Migrate Run utils for components dependent on `python-sdk-v2` - I --- assets/common/src/utils/run_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/assets/common/src/utils/run_utils.py b/assets/common/src/utils/run_utils.py index 370eddce2f..775cc73699 100644 --- a/assets/common/src/utils/run_utils.py +++ b/assets/common/src/utils/run_utils.py @@ -5,7 +5,9 @@ from azureml.core import Run from azureml.core.compute import ComputeTarget -from utils.config import LoggerConfig +from config import LoggerConfig +from common_utils import get_mlclient +import os class JobRunDetails: @@ -16,7 +18,8 @@ class JobRunDetails: def __init__(self): """Run details init. Should not be called directly and be instantiated via get_run_details.""" - self._run = Run.get_context() + ml_client = get_mlclient() + self._run = ml_client.jobs.get(os.environ["AZUREML_RUN_ID"]) self._details = None @staticmethod @@ -29,7 +32,7 @@ def get_run_details(): @property def run_id(self): """Run ID of the existing run.""" - return self._run.id + return self._run.name @property def parent_run_id(self): From d114f43e4d32ca3f7e17ddabd7d4bbcae8ef6867 Mon Sep 17 00:00:00 2001 From: Sarthak Singhal Date: Tue, 22 Apr 2025 17:16:41 +0530 Subject: [PATCH 6/6] Migrate logging utils for components dependent on `python-sdk-v2` - I --- assets/common/src/utils/logging_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/common/src/utils/logging_utils.py b/assets/common/src/utils/logging_utils.py index 380c6517a0..6c25dff028 100644 --- a/assets/common/src/utils/logging_utils.py +++ b/assets/common/src/utils/logging_utils.py @@ -9,7 +9,7 @@ import logging import sys -from azureml.telemetry import get_telemetry_log_handler +from azure.ai.ml._telemetry import get_appinsights_log_handler from azureml.telemetry._telemetry_formatter import ExceptionFormatter from utils.config import AppName, LoggerConfig @@ -121,9 +121,9 @@ def get_logger(name=LoggerConfig.LOGGER_NAME, level=LoggerConfig.VERBOSITY_LEVEL if LoggerConfig.APPINSIGHT_HANDLER_NAME not in handler_names: instrumentation_key = codecs.decode(LoggerConfig.INSTRUMENTATION_KEY, LoggerConfig.CODEC).decode("utf-8") - appinsights_handler = get_telemetry_log_handler( + appinsights_handler = get_appinsights_log_handler( instrumentation_key=instrumentation_key, - component_name="automl", + ... ) formatter = ExceptionFormatter(