Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:
- azure-ai-ml=={{latest-pypi-version}}
- azure-identity=={{latest-pypi-version}}
# v1 package for fetching run details
- azureml-core=={{latest-pypi-version}}
# - azureml-core=={{latest-pypi-version}}
# v1 package for logging and telemetry. Switch to using application-insights and implementing custom logging
- azureml-telemetry=={{latest-pypi-version}}
- cryptography=={{latest-pypi-version}}
Expand Down
36 changes: 22 additions & 14 deletions assets/common/src/batch_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,15 @@
BatchDeployment,
BatchRetrySettings,
)
from azureml._common._error_definition import AzureMLError
from azureml._common.exceptions import AzureMLException
from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException
from pathlib import Path

from utils.config import AppName, ComponentVariables
from utils.common_utils import get_mlclient, get_model_name
from utils.logging_utils import custom_dimensions, get_logger
from utils.exceptions import (
swallow_all_exceptions,
BatchEndpointInvocationError,
EndpointCreationError,
DeploymentCreationError,
ComputeCreationError,
ModelImportErrorStrings,
)


Expand Down Expand Up @@ -216,8 +212,11 @@ def invoke_endpoint_job(ml_client, endpoint, type, args):
download_batch_output(ml_client, job, args)

except Exception as e:
raise AzureMLException._with_error(
AzureMLError.create(BatchEndpointInvocationError, exception=e)
message = ModelImportErrorStrings.BATCH_ENDPOINT_INVOCATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT,
error=e
)


Expand All @@ -239,8 +238,11 @@ def get_or_create_compute(ml_client, compute_name, args):
ml_client.begin_create_or_update(compute_cluster).wait()
logger.info("Compute cluster created successfully.")
except Exception as e:
raise AzureMLException._with_error(
AzureMLError.create(ComputeCreationError, exception=e)
message = ModelImportErrorStrings.COMPUTE_CREATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPUTE,
error=e
)
return compute_cluster

Expand Down Expand Up @@ -272,16 +274,22 @@ def create_endpoint_and_deployment(ml_client, compute_name, model_id, endpoint_n
ml_client.begin_create_or_update(endpoint).wait()
logger.info("Endpoint created successfully.")
except Exception as e:
raise AzureMLException._with_error(
AzureMLError.create(EndpointCreationError, exception=e)
message = ModelImportErrorStrings.ENDPOINT_CREATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT,
error=e
)

try:
logger.info(f"Creating deployment {deployment}")
ml_client.batch_deployments.begin_create_or_update(deployment).wait()
except Exception as e:
raise AzureMLException._with_error(
AzureMLError.create(DeploymentCreationError, exception=e)
message = ModelImportErrorStrings.DEPLOYMENT_CREATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.DEPLOYMENT,
error=e
)

logger.info("Deployment successful.")
Expand Down
29 changes: 22 additions & 7 deletions assets/common/src/delete_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import json
from azure.ai.ml import MLClient
from azure.identity import ManagedIdentityCredential
from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
from azureml.core import Run
from pathlib import Path


Expand Down Expand Up @@ -59,15 +59,30 @@ def get_ml_client():
try:
credential.get_token("https://management.azure.com/.default")
except Exception as ex:
raise (f"Failed to get credentials : {ex}")
run = Run.get_context(allow_offline=False)
ws = run.experiment.workspace
message = "Failed to get credentials : {ex}"
raise MlException(
message=message.format(ex=ex), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.IDENTITY,
error=ex
)

try:
subscription_id = os.environ['AZUREML_ARM_SUBSCRIPTION']
resource_group = os.environ["AZUREML_ARM_RESOURCEGROUP"]
workspace = os.environ["AZUREML_ARM_WORKSPACE_NAME"]
except Exception as ex:
message = "Failed to get AzureML ARM env variable : {ex}"
raise MlException(
message=message.format(ex=ex), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPONENT,
error=ex
)

ml_client = MLClient(
credential=credential,
subscription_id=ws._subscription_id,
resource_group_name=ws._resource_group,
workspace_name=ws._workspace_name,
subscription_id=subscription_id,
resource_group_name=resource_group,
workspace_name=workspace,
)
return ml_client

Expand Down
28 changes: 17 additions & 11 deletions assets/common/src/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,15 @@
OnlineRequestSettings,
ProbeSettings,
)
from azureml._common._error_definition import AzureMLError
from azureml._common.exceptions import AzureMLException
from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException
from pathlib import Path

from utils.config import AppName, ComponentVariables
from utils.common_utils import get_mlclient, get_model_name
from utils.logging_utils import custom_dimensions, get_logger
from utils.exceptions import (
swallow_all_exceptions,
OnlineEndpointInvocationError,
EndpointCreationError,
DeploymentCreationError,
ModelImportErrorStrings,
)


Expand Down Expand Up @@ -216,8 +213,11 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen
endpoint = ml_client.online_endpoints.get(endpoint.name)
logger.info(f"Endpoint created {endpoint.id}")
except Exception as e:
raise AzureMLException._with_error(
AzureMLError.create(EndpointCreationError, exception=e)
message = ModelImportErrorStrings.ENDPOINT_CREATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT,
error=e
)

try:
Expand All @@ -235,8 +235,11 @@ def create_endpoint_and_deployment(ml_client, model_id, endpoint_name, deploymen
except Exception as ex:
logger.error(f"Error in fetching deployment logs: {ex}")

raise AzureMLException._with_error(
AzureMLError.create(DeploymentCreationError, exception=e)
message = ModelImportErrorStrings.DEPLOYMENT_CREATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.DEPLOYMENT,
error=e
)

logger.info(f"Deployment successful. Updating endpoint to take 100% traffic for deployment {deployment_name}")
Expand Down Expand Up @@ -312,8 +315,11 @@ def main():
print(f"Response:\n{response}")
logger.info(f"Endpoint invoked successfully with response :{response}")
except Exception as e:
raise AzureMLException._with_error(
AzureMLError.create(OnlineEndpointInvocationError, exception=e)
message = ModelImportErrorStrings.ONLINE_ENDPOINT_INVOCATION_ERROR
raise MlException(
message=message.format(exception=e), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENDPOINT,
error=e
)

print("Saving deployment details ...")
Expand Down
17 changes: 11 additions & 6 deletions assets/common/src/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@
from pathlib import Path
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import Model
from azureml._common._error_definition import AzureMLError
from azureml._common.exceptions import AzureMLException
from azure.ai.ml.exceptions import ValidationException, ErrorTarget, ErrorCategory

from utils.common_utils import get_mlclient, get_job_asset_uri
from utils.config import AppName, ComponentVariables
from utils.logging_utils import custom_dimensions, get_logger
from utils.exceptions import (
swallow_all_exceptions,
UnSupportedModelTypeError,
MissingModelNameError,
ModelImportErrorStrings
)


Expand Down Expand Up @@ -139,10 +137,17 @@ def main():

# validations
if model_type not in SUPPORTED_MODEL_ASSET_TYPES:
raise AzureMLException._with_error(AzureMLError.create(UnSupportedModelTypeError, model_type=model_type))
message = ModelImportErrorStrings.UNSUPPORTED_MODEL_TYPE_ERROR.format(model_type=model_type)
raise ValidationException(
message=message, no_personal_data_message=message, target=ErrorTarget.MODEL
)

if not model_name:
raise AzureMLException._with_error(AzureMLError.create(MissingModelNameError))
message = ModelImportErrorStrings.MISSING_MODEL_NAME_ERROR
raise ValidationException(
message=message, no_personal_data_message=message,
target=ErrorTarget.MODEL, error_category=ErrorCategory.USER_ERROR
)

if not re.match(VALID_MODEL_NAME_PATTERN, model_name):
# update model name to one supported for registration
Expand Down
38 changes: 27 additions & 11 deletions assets/common/src/run_mlflow_model_local_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,15 @@
import argparse
import os
import shutil
from azureml._common._error_definition import AzureMLError
from azureml._common.exceptions import AzureMLException
from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException
from pathlib import Path

from utils.config import AppName
from utils.exceptions import swallow_all_exceptions
from utils.logging_utils import custom_dimensions, get_logger
from utils.common_utils import run_command
from utils.exceptions import (
CondaEnvCreationError,
CondaFileMissingError,
MlflowModelValidationError,
swallow_all_exceptions,
ModelImportErrorStrings,
)


Expand Down Expand Up @@ -58,7 +55,11 @@ def run():

conda_file_path = os.path.join(model_dir, CONDA_FILE_NAME)
if not os.path.exists(conda_file_path):
raise AzureMLException._with_error(AzureMLError.create(CondaFileMissingError))
message = ModelImportErrorStrings.CONDA_FILE_MISSING_ERROR
raise MlException(
message=message, no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.MODEL
)

# copy conda.yaml to cwd
shutil.copy(conda_file_path, CONDA_FILE_NAME)
Expand All @@ -68,12 +69,22 @@ def run():
exit_code, stdout = run_command(CREATE_CONDA_CMD.format(ENV_PREFIX, CONDA_FILE_NAME))
if exit_code != 0:
logger.warning(f"Error in creating conda env. Error details {stdout}")
raise AzureMLException._with_error(AzureMLError.create(CondaEnvCreationError))
message = ModelImportErrorStrings.CONDA_ENV_CREATION_ERROR
raise MlException(
message=message, no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENVIRONMENT,
error=stdout
)

exit_code, stdout = run_command(CONDA_LIST.format(ENV_PREFIX))
if exit_code != 0:
logger.warning(f"Error in listing env at {ENV_PREFIX}. Error details {stdout}")
raise AzureMLException._with_error(AzureMLError.create(CondaEnvCreationError))
message = "Error in listing env at {ENV_PREFIX}. Error details {stdout}"
logger.warning(message.format(ENV_PREFIX=ENV_PREFIX, stdout=stdout))
raise MlException(
message=message, no_personal_data_message=message.format(ENV_PREFIX=ENV_PREFIX, stdout=stdout),
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENVIRONMENT,
error=stdout
)
logger.info(f"pip list: \n{stdout}")

cmd = f"python {SCRIPT_PATH} --model-path {model_dir}"
Expand All @@ -97,7 +108,12 @@ def run():

if exit_code != 0:
logger.warning(f"Local validation failed. Error {stdout}")
raise AzureMLException._with_error(AzureMLError.create(MlflowModelValidationError))
message = ModelImportErrorStrings.MLFLOW_LOCAL_VALIDATION_ERROR
raise MlException(
message=message, no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.ENVIRONMENT,
error=stdout
)

# copy the model to output dir
shutil.copytree(src=model_dir, dst=output_model_path, dirs_exist_ok=True)
Expand Down
50 changes: 35 additions & 15 deletions assets/common/src/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,14 @@
from azure.ai.ml import MLClient
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
from azure.identity import ManagedIdentityCredential
from azureml._common._error_definition import AzureMLError
from azureml._common.exceptions import AzureMLException
from azureml.core.run import Run
from azure.ai.ml.exceptions import ErrorTarget, ErrorCategory, MlException, ValidationException
from pathlib import Path
from subprocess import PIPE, run, STDOUT
from typing import Tuple

from utils.logging_utils import get_logger
from utils.run_utils import JobRunDetails
from utils.exceptions import UserIdentityMissingError, InvalidModelIDError
from logging_utils import get_logger
from run_utils import JobRunDetails
from exceptions import ModelImportErrorStrings


logger = get_logger(__name__)
Expand Down Expand Up @@ -59,16 +57,30 @@ def get_mlclient(registry_name: str = None):
# Check if given credential can get token successfully.
credential.get_token("https://management.azure.com/.default")
except Exception as ex:
raise AzureMLException._with_error(AzureMLError.create(UserIdentityMissingError, exception=ex))
message = ModelImportErrorStrings.USER_IDENTITY_MISSING_ERROR
raise MlException(
message=message.format(ex=ex), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.IDENTITY,
error=ex
)
try:
subscription_id = os.environ['AZUREML_ARM_SUBSCRIPTION']
resource_group = os.environ["AZUREML_ARM_RESOURCEGROUP"]
workspace = os.environ["AZUREML_ARM_WORKSPACE_NAME"]
except Exception as ex:
message = "Failed to get AzureML ARM env variable : {ex}"
raise MlException(
message=message.format(ex=ex), no_personal_data_message=message,
error_category=ErrorCategory.SYSTEM_ERROR, target=ErrorTarget.COMPONENT,
error=ex
)

if registry_name is None:
run = Run.get_context(allow_offline=False)
ws = run.experiment.workspace
return MLClient(
credential=credential,
subscription_id=ws._subscription_id,
resource_group_name=ws._resource_group,
workspace_name=ws._workspace_name,
subscription_id=subscription_id,
resource_group_name=resource_group,
workspace_name=workspace,
)
logger.info(f"Creating MLClient with registry name {registry_name}")
return MLClient(credential=credential, registry_name=registry_name)
Expand All @@ -81,7 +93,11 @@ def get_model_name(model_id: str):
if match:
return match.group(2) or match.group(5)
else:
raise AzureMLException._with_error(AzureMLError.create(InvalidModelIDError, model_id=model_id))
message = ModelImportErrorStrings.INVALID_MODEL_ID_ERROR
raise ValidationException(
message=message.format(model_id=model_id), no_personal_data_message=message,
error_category=ErrorCategory.USER_ERROR, target=ErrorTarget.COMPONENT
)


def get_model_name_version(model_id: str):
Expand All @@ -100,8 +116,12 @@ def get_model_name_version(model_id: str):
logger.info(f"ws asset URI, returning {match.group(1)}, {match.group(2)}")
return match.group(2) or match.group(5)

logger.info(f"Unsupported model asset uri: {model_id}")
raise AzureMLException._with_error(AzureMLError.create(InvalidModelIDError, model_id=model_id))
message = "Unsupported model asset uri: {model_id}"
logger.info(message.format(model_id=model_id))
raise MlException(
message=message.format(model_id=model_id), no_personal_data_message=message,
error_category=ErrorCategory.USER_ERROR, target=ErrorTarget.COMPONENT
)


def get_job_uri_from_input_run_assetId(assetID: str):
Expand Down
Loading
Loading