diff --git a/src/workload-orchestration/azext_workload_orchestration/_help.py b/src/workload-orchestration/azext_workload_orchestration/_help.py index 1f93a9946f0..dbfc3d8192f 100644 --- a/src/workload-orchestration/azext_workload_orchestration/_help.py +++ b/src/workload-orchestration/azext_workload_orchestration/_help.py @@ -46,3 +46,52 @@ - name: Use a specific kubeconfig and context text: az workload-orchestration support create-bundle --kube-config ~/.kube/prod-config --kube-context my-cluster """ + +helps['workload-orchestration cluster init'] = """ +type: command +short-summary: Prepare an Arc-connected Kubernetes cluster for Workload Orchestration. +long-summary: | + Installs all prerequisites on an Arc-connected cluster to make it ready for + Workload Orchestration. This is an idempotent operation that skips components + already installed. + + Steps performed: + 1. Verify cluster is Arc-connected with required features enabled + 2. Install cert-manager (if not present) + 3. Install trust-manager (if not present) + 4. Install WO extension (if not present) + 5. Create custom location (if not present) + + After running this command, use the output custom location ID with + 'az workload-orchestration target create --extended-location'. +examples: + - name: Initialize a cluster with defaults + text: az workload-orchestration cluster init -c my-cluster -g my-rg -l eastus2euap + - name: Initialize with a specific release train + text: az workload-orchestration cluster init -c my-cluster -g my-rg -l eastus2euap --release-train dev + - name: Pin a specific extension version + text: az workload-orchestration cluster init -c my-cluster -g my-rg -l eastus2euap --extension-version 2.1.28 + - name: Custom location name + text: az workload-orchestration cluster init -c my-cluster -g my-rg -l eastus2euap --custom-location-name my-cl +""" + +helps['workload-orchestration hierarchy create'] = """ +type: command +short-summary: Create a hierarchy (Site + Configuration + ConfigurationReference) in one command. +long-summary: | + Creates the full resource stack for a hierarchy level: + 1. Site (with level label) + 2. Configuration (in specified region) + 3. ConfigurationReference (links site to configuration) + + Supports two types: + - ResourceGroup (default): single site in a resource group + - ServiceGroup: nested sites under a service group (up to 3 levels) +examples: + - name: Create RG hierarchy from YAML file + text: az workload-orchestration hierarchy create -g my-rg --configuration-location eastus2euap --hierarchy-spec "@hierarchy.yaml" + - name: Create RG hierarchy with shorthand + text: az workload-orchestration hierarchy create -g my-rg --configuration-location eastus2euap --hierarchy-spec "name=Mehoopany level=factory" + - name: Create ServiceGroup hierarchy from YAML + text: az workload-orchestration hierarchy create --configuration-location eastus2euap --hierarchy-spec "@sg-hierarchy.yaml" +""" diff --git a/src/workload-orchestration/azext_workload_orchestration/_params.py b/src/workload-orchestration/azext_workload_orchestration/_params.py index b17dd8cccd7..33fe188aa8b 100644 --- a/src/workload-orchestration/azext_workload_orchestration/_params.py +++ b/src/workload-orchestration/azext_workload_orchestration/_params.py @@ -65,3 +65,65 @@ def load_arguments(self, _): # pylint: disable=unused-argument options_list=['--kube-context'], help='Kubernetes context to use. Defaults to current context.', ) + c.argument( + 'skip_site_reference', + options_list=['--skip-site-reference'], + action='store_true', + help='Skip auto-creation of site-reference to context.', + ) + + with self.argument_context('workload-orchestration cluster init') as c: + c.argument('cluster_name', options_list=['--cluster-name', '-c'], + help='Name of the Arc-connected Kubernetes cluster.', required=True) + c.argument('resource_group', options_list=['--resource-group', '-g'], + help='Resource group of the Arc-connected cluster.', required=True) + c.argument('location', options_list=['--location', '-l'], + help='Azure region for the custom location (e.g., eastus2euap).', required=True) + c.argument('release_train', options_list=['--release-train'], + help='Extension release train. Default: stable.') + c.argument('extension_version', options_list=['--extension-version'], + help='Specific WO extension version to install.') + c.argument('extension_name', options_list=['--extension-name'], + help='Name for the WO extension resource. Default: wo-extension.') + c.argument('custom_location_name', options_list=['--custom-location-name'], + help='Name for the custom location. Default: `-cl`.') + + with self.argument_context('workload-orchestration hierarchy create') as c: + c.argument('resource_group', options_list=['--resource-group', '-g'], + help='Resource group for Configuration resources.', required=True) + c.argument('configuration_location', options_list=['--configuration-location'], + help='Azure region for the Configuration resource (e.g., eastus2euap).', required=True) + c.argument('hierarchy_spec', options_list=['--hierarchy-spec'], + help='Hierarchy specification as YAML/JSON file (@file.yaml) or shorthand syntax.', + required=True, type=_parse_hierarchy_spec) + + +def _parse_hierarchy_spec(value): + """Parse hierarchy spec from file path or shorthand syntax.""" + import os + + # Handle @file syntax (@ may be stripped by CLI framework) + filepath = value.lstrip('@') + if os.path.exists(filepath): + try: + import yaml + except ImportError: + import json as yaml_fallback + with open(filepath, 'r', encoding='utf-8') as f: + return yaml_fallback.load(f) + with open(filepath, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + + # Shorthand: name=X level=Y type=Z + result = {} + for pair in value.split(): + if '=' in pair: + k, v = pair.split('=', 1) + result[k] = v + if not result: + from azure.cli.core.azclierror import ValidationError + raise ValidationError( + f"Invalid hierarchy-spec: '{value}'. " + "Use a YAML file path or shorthand: name=X level=Y" + ) + return result diff --git a/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/context/_create.py b/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/context/_create.py index 6292ad711b4..957d55b7ac7 100644 --- a/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/context/_create.py +++ b/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/context/_create.py @@ -9,6 +9,7 @@ # flake8: noqa from azure.cli.core.aaz import * +from azure.cli.core.azclierror import CLIInternalError as CLIError @register_command( @@ -128,6 +129,14 @@ def _build_arguments_schema(cls, *args, **kwargs): tags = cls._args_schema.tags tags.Element = AAZStrArg() + + # Custom arg: --site-id (not sent to ARM, used in post_operations) + _args_schema.site_id = AAZStrArg( + options=["--site-id"], + arg_group="Onboarding", + help="ARM resource ID of a Site to auto-create a site reference after context creation.", + ) + return cls._args_schema def _execute_operations(self): @@ -141,7 +150,47 @@ def pre_operations(self): @register_callback def post_operations(self): - pass + if hasattr(self.ctx.args, 'site_id') and self.ctx.args.site_id: + self._create_site_reference() + + def _create_site_reference(self): + """Auto-create a site reference linking the site to this context.""" + import logging + import re + logger = logging.getLogger(__name__) + + site_id = str(self.ctx.args.site_id) + context_name = str(self.ctx.args.context_name) + rg = str(self.ctx.args.resource_group) + + # Extract site name from ARM ID for the reference name + site_name = site_id.rstrip("/").split("/")[-1] + ref_name = f"{site_name}-ref" + # Sanitize: only alphanumeric and hyphens, 3-61 chars + ref_name = re.sub(r'[^a-zA-Z0-9-]', '-', ref_name)[:61] + + logger.info("Creating site reference '%s' -> %s", ref_name, site_id) + + try: + from azext_workload_orchestration.onboarding.utils import invoke_cli_command, CmdProxy + cmd_proxy = CmdProxy(self.ctx.cli_ctx) + invoke_cli_command(cmd_proxy, [ + "workload-orchestration", "context", "site-reference", "create", + "-g", rg, + "--context-name", context_name, + "--site-reference-name", ref_name, + "--site-id", site_id, + ]) + logger.info("Site reference '%s' created successfully", ref_name) + except Exception as exc: + logger.warning("Site reference creation failed: %s", exc) + raise CLIError( + f"Context created successfully, but site reference creation failed: {exc}\n" + f"Run manually:\n" + f" az workload-orchestration context site-reference create " + f"-g {rg} --context-name {context_name} " + f"--site-reference-name {ref_name} --site-id {site_id}" + ) def _output(self, *args, **kwargs): result = self.deserialize_output(self.ctx.vars.instance, client_flatten=True) diff --git a/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_create.py b/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_create.py index 7308557c30a..a68e1ec1b3f 100644 --- a/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_create.py +++ b/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_create.py @@ -15,6 +15,7 @@ logger = logging.getLogger(__name__) + @register_command( "workload-orchestration target create", ) @@ -117,10 +118,16 @@ def _build_arguments_schema(cls, *args, **kwargs): options=["--target-specification"], arg_group="Properties", help="Specifies that we are using Helm charts for the k8s deployment", - required=True, ) + # Onboarding simplification arguments + _args_schema.service_group = AAZStrArg( + options=["--service-group"], + arg_group="Onboarding", + help="ServiceGroup name to auto-link this target to after creation.", + ) + capabilities = cls._args_schema.capabilities capabilities.Element = AAZStrArg() @@ -170,30 +177,63 @@ def _execute_operations(self): @register_callback def pre_operations(self): - # If context_id is not provided, try to get it from config + # Resolve context_id from CLI config if not provided if not self.ctx.args.context_id: - try: - # Attempt to retrieve the context_id from the config file - context_id = self.ctx.cli_ctx.config.get('workload_orchestration', 'context_id') - if context_id: - self.ctx.args.context_id = context_id - else: - # This else block handles the case where the section exists, but the key is empty - raise CLIInternalError( - "No context-id was provided, and no default context is set. " - "Please provide the --context-id argument or set a default context using 'az workload-orchestration context use'." - ) - except configparser.NoSectionError as e: - logger.debug("Config section 'workload_orchestration' not found: %s", e) - # This is the fix: catch the specific error when the [workload_orchestration] section is missing + self._resolve_context_id_from_config() + + def _resolve_context_id_from_config(self): + """Resolve context_id from CLI config if not already set.""" + try: + context_id = self.ctx.cli_ctx.config.get('workload_orchestration', 'context_id') + if context_id: + self.ctx.args.context_id = context_id + else: raise CLIInternalError( "No context-id was provided, and no default context is set. " - "Please provide the --context-id argument or set a default context using 'az workload-orchestration context use'." + "Please provide the --context-id argument " + "or set a default context using 'az workload-orchestration context use'." ) + except configparser.NoSectionError as e: + logger.debug("Config section 'workload_orchestration' not found: %s", e) + raise CLIInternalError( + "No context-id was provided, and no default context is set. " + "Please provide the --context-id argument " + "or set a default context using 'az workload-orchestration context use'." + ) @register_callback def post_operations(self): - pass + # --service-group: auto-link target to SG after creation + if hasattr(self.ctx.args, 'service_group') and self.ctx.args.service_group: + self._handle_service_group_link() + + def _handle_service_group_link(self): + """Link the created target to a service group.""" + from azext_workload_orchestration.onboarding.target_sg_link import ( + link_target_to_service_group + ) + from azext_workload_orchestration.onboarding.utils import CmdProxy + sg_name = str(self.ctx.args.service_group) + # Get target ID from the response + target_id = None + if hasattr(self.ctx.vars, 'instance') and self.ctx.vars.instance: + target_id = self.ctx.vars.instance.get("id") + + if not target_id: + # Construct it + sub_id = self.ctx.subscription_id + rg = str(self.ctx.args.resource_group) + name = str(self.ctx.args.target_name) + target_id = f"/subscriptions/{sub_id}/resourceGroups/{rg}/providers/Microsoft.Edge/targets/{name}" + + print(f"[service-group] Linking target to '{sg_name}'...") + try: + cmd_proxy = CmdProxy(self.ctx.cli_ctx) + link_target_to_service_group(cmd_proxy, target_id, sg_name) + print(f"[service-group] Linked [OK]") + except Exception as exc: + logger.warning("Service group link failed (non-critical): %s", exc) + print(f"[service-group] Link failed (non-critical): {exc}") def _output(self, *args, **kwargs): result = self.deserialize_output(self.ctx.vars.instance, client_flatten=True) diff --git a/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_install.py b/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_install.py index 6a1a0e35238..365bf0cc1d6 100644 --- a/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_install.py +++ b/src/workload-orchestration/azext_workload_orchestration/aaz/latest/workload_orchestration/target/_install.py @@ -9,15 +9,26 @@ # flake8: noqa from azure.cli.core.aaz import * +from azure.cli.core.azclierror import CLIInternalError, ValidationError @register_command( "workload-orchestration target install", ) class Install(AAZCommand): - """Post request to install a solution - :example: Install a solution to a target - az workload-orchestration target install -g rg1 -n target1 --solution-version-id /subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myRG/providers/Microsoft.Edge/solutionVersions/mySolutionVersion + """Install a solution on a target. + + When invoked with --solution-template-version-id (or --solution-template-name + --solution-template-version), + runs the full deployment chain: config-set (optional) → review → publish → install. + + When invoked with --solution-version-id only (old flow), runs direct install. + + :example: Full deploy (friendly name) + az workload-orchestration target install -g rg1 -n target1 --solution-template-name tmpl --solution-template-version 1.0.0 + :example: Full deploy with config + az workload-orchestration target install -g rg1 -n target1 --solution-template-name tmpl --stv 1.0.0 --config values.yaml --config-template-rg rg1 --config-template-name tmpl --ct-version 1.0.0 + :example: Direct install (old flow) + az workload-orchestration target install -g rg1 -n target1 --solution-version-id /subscriptions/.../solutionVersions/sv1 """ _aaz_info = { @@ -41,8 +52,6 @@ def _build_arguments_schema(cls, *args, **kwargs): return cls._args_schema cls._args_schema = super()._build_arguments_schema(*args, **kwargs) - # define Arg Group "" - _args_schema = cls._args_schema _args_schema.resource_group = AAZResourceGroupNameArg( required=True, @@ -59,31 +68,47 @@ def _build_arguments_schema(cls, *args, **kwargs): ), ) - # define Arg Group "Body" - _args_schema = cls._args_schema - - # Remove these parameters (v2025_06_01) - # _args_schema.solution = AAZStrArg( - # options=["--solution"], - # arg_group="Body", - # help="Solution Name", - # required=True, - # ) - # _args_schema.solution_version = AAZStrArg( - # options=["--solution-version"], - # arg_group="Body", - # help="Solution Version Name", - # required=True, - # ) - - # Add new parameter (v2025_06_01) + # Old flow: direct install with solution-version-id _args_schema.solution_version_id = AAZStrArg( options=["--solution-version-id"], arg_group="Body", - help="Solution Version ARM Id", - required=True, + help="Solution Version ARM ID (direct install, skips review/publish).", + ) + + # New flow: full deploy chain + _args_schema.solution_template_version_id = AAZStrArg( + options=["--solution-template-version-id"], + arg_group="Deploy", + help="Full ARM ID of the solution template version. Triggers full deploy chain.", + ) + _args_schema.solution_template_name = AAZStrArg( + options=["--solution-template-name"], + arg_group="Deploy", + help="Name of the solution template. Use with --solution-template-version.", + ) + _args_schema.solution_template_version = AAZStrArg( + options=["--solution-template-version"], + arg_group="Deploy", + help="Version of the solution template (e.g., 1.0.0).", + ) + _args_schema.solution_instance_name = AAZStrArg( + options=["--solution-instance-name"], + arg_group="Deploy", + help="Custom solution instance name for the review step.", + ) + _args_schema.solution_dependencies = AAZStrArg( + options=["--solution-dependencies"], + arg_group="Deploy", + help="JSON string of solution dependency definitions.", + ) + + # Config set args + _args_schema.config = AAZStrArg( + options=["--config", "--configuration"], + arg_group="Config", + help="Path to YAML/JSON config file to set before review.", ) - + return cls._args_schema def _execute_operations(self): @@ -93,7 +118,54 @@ def _execute_operations(self): @register_callback def pre_operations(self): - pass + """If template args provided, run config-set → review → publish before install.""" + args = self.ctx.args + has_template = ( + args.solution_template_version_id + or args.solution_template_name + ) + has_direct = args.solution_version_id + + # Validate: need either template args OR solution-version-id + if not has_template and not has_direct: + raise ValidationError( + "Provide either --solution-template-version-id (or --solution-template-name + " + "--solution-template-version) for full deploy, or --solution-version-id for direct install." + ) + + if has_template and has_direct: + raise ValidationError( + "Provide either solution template args (for full deploy) or " + "--solution-version-id (for direct install), not both." + ) + + if has_template: + self._run_deploy_chain() + + def _run_deploy_chain(self): + """Run config-set → review → publish, then let the AAZ install handle the final step.""" + from azext_workload_orchestration.onboarding.target_deploy import ( + target_deploy_pre_install, + ) + from azext_workload_orchestration.onboarding.utils import CmdProxy + + args = self.ctx.args + cmd_proxy = CmdProxy(self.ctx.cli_ctx) + + sv_id = target_deploy_pre_install( + cmd=cmd_proxy, + resource_group=str(args.resource_group), + target_name=str(args.target_name), + solution_template_version_id=str(args.solution_template_version_id) if args.solution_template_version_id else None, + solution_template_name=str(args.solution_template_name) if args.solution_template_name else None, + solution_template_version=str(args.solution_template_version) if args.solution_template_version else None, + solution_instance_name=str(args.solution_instance_name) if args.solution_instance_name else None, + solution_dependencies=str(args.solution_dependencies) if args.solution_dependencies else None, + config=str(args.config) if args.config else None, + ) + + # Set the solution_version_id for the AAZ install step + args.solution_version_id = sv_id @register_callback def post_operations(self): diff --git a/src/workload-orchestration/azext_workload_orchestration/commands.py b/src/workload-orchestration/azext_workload_orchestration/commands.py index 1f1d9c002a7..7c1fa55ced1 100644 --- a/src/workload-orchestration/azext_workload_orchestration/commands.py +++ b/src/workload-orchestration/azext_workload_orchestration/commands.py @@ -10,5 +10,11 @@ def load_command_table(self, _): # pylint: disable=unused-argument - with self.command_group('workload-orchestration support', is_preview=True) as g: + with self.command_group('workload-orchestration support') as g: g.custom_command('create-bundle', 'create_support_bundle') + + with self.command_group('workload-orchestration cluster') as g: + g.custom_command('init', 'target_init') + + with self.command_group('workload-orchestration hierarchy') as g: + g.custom_command('create', 'hierarchy_create') diff --git a/src/workload-orchestration/azext_workload_orchestration/custom.py b/src/workload-orchestration/azext_workload_orchestration/custom.py index 849a65ef9de..94e8a97cbbd 100644 --- a/src/workload-orchestration/azext_workload_orchestration/custom.py +++ b/src/workload-orchestration/azext_workload_orchestration/custom.py @@ -7,3 +7,7 @@ # Support bundle command from azext_workload_orchestration.support import create_support_bundle # pylint: disable=unused-import # noqa: F401 + +# Onboarding simplification commands +from azext_workload_orchestration.onboarding import target_init # pylint: disable=unused-import # noqa: F401 +from azext_workload_orchestration.onboarding import hierarchy_create # pylint: disable=unused-import # noqa: F401 diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/__init__.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/__init__.py new file mode 100644 index 00000000000..2bdcc95085d --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/__init__.py @@ -0,0 +1,86 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Onboarding simplification commands for Workload Orchestration. + +Provides convenience CLI commands that wrap multiple API calls +into single-command operations to reduce onboarding steps. +""" + +from azext_workload_orchestration.onboarding.target_prepare import target_prepare +from azext_workload_orchestration.onboarding.target_deploy import target_deploy as _target_deploy +from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create as _hierarchy_create + + +def target_init( + cmd, + cluster_name, + resource_group, + location, + release_train=None, + extension_version=None, + extension_name=None, + custom_location_name=None, +): + """Prepare an Arc-connected cluster for Workload Orchestration.""" + result = target_prepare( + cmd=cmd, + cluster_name=cluster_name, + resource_group=resource_group, + location=location, + extension_name=extension_name, + custom_location_name=custom_location_name, + extension_version=extension_version, + release_train=release_train, + ) + return result + + +def target_deploy( + cmd, + resource_group, + target_name, + solution_template_version_id=None, + solution_template_name=None, + solution_template_version=None, + solution_template_rg=None, + solution_instance_name=None, + solution_dependencies=None, + config=None, + config_hierarchy_id=None, + config_template_rg=None, + config_template_name=None, + config_template_version=None, +): + """Deploy a solution to a target: review → publish → install.""" + return _target_deploy( + cmd=cmd, + resource_group=resource_group, + target_name=target_name, + solution_template_version_id=solution_template_version_id, + solution_template_name=solution_template_name, + solution_template_version=solution_template_version, + solution_template_rg=solution_template_rg, + solution_instance_name=solution_instance_name, + solution_dependencies=solution_dependencies, + config=config, + config_hierarchy_id=config_hierarchy_id, + config_template_rg=config_template_rg, + config_template_name=config_template_name, + config_template_version=config_template_version, + ) + + +__all__ = ['target_prepare', 'target_init', 'target_deploy', 'hierarchy_create'] + + +def hierarchy_create(cmd, resource_group=None, configuration_location=None, hierarchy_spec=None): + """Create a hierarchy: Site + Configuration + ConfigurationReference.""" + return _hierarchy_create( + cmd=cmd, + resource_group=resource_group, + configuration_location=configuration_location, + hierarchy_spec=hierarchy_spec, + ) diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/consts.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/consts.py new file mode 100644 index 00000000000..e683127701f --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/consts.py @@ -0,0 +1,82 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Constants for onboarding simplification commands.""" + +# pylint: disable=line-too-long + +# --------------------------------------------------------------------------- +# API Versions +# --------------------------------------------------------------------------- +SERVICE_GROUP_API_VERSION = "2024-02-01-preview" +SITE_API_VERSION = "2025-06-01" +CONFIGURATION_API_VERSION = "2025-08-01" +CONFIG_REF_API_VERSION = "2025-08-01" +TARGET_API_VERSION = "2025-08-01" +SG_MEMBER_API_VERSION = "2023-09-01-preview" +CONTEXT_API_VERSION = "2025-08-01" + +# --------------------------------------------------------------------------- +# ARM Endpoints +# --------------------------------------------------------------------------- +ARM_ENDPOINT = "https://management.azure.com" +ARM_RESOURCE = "https://management.azure.com" + +# --------------------------------------------------------------------------- +# Resource Providers +# --------------------------------------------------------------------------- +EDGE_RP_NAMESPACE = "Microsoft.Edge" +SERVICE_GROUP_RP = "Microsoft.Management" +RELATIONSHIPS_RP = "Microsoft.Relationships" + +# --------------------------------------------------------------------------- +# cert-manager Defaults +# --------------------------------------------------------------------------- +DEFAULT_CERT_MANAGER_VERSION = "v1.15.3" +CERT_MANAGER_MANIFEST_URL = ( + "https://github.com/cert-manager/cert-manager/releases/download" + "/{version}/cert-manager.yaml" +) +CERT_MANAGER_NAMESPACE = "cert-manager" +CERT_MANAGER_WEBHOOK_DEPLOYMENT = "cert-manager-webhook" +CERT_MANAGER_MIN_PODS = 3 # webhook, controller, cainjector + +# --------------------------------------------------------------------------- +# trust-manager Defaults +# --------------------------------------------------------------------------- +TRUST_MANAGER_DEPLOYMENT = "trust-manager" +TRUST_MANAGER_HELM_REPO = "https://charts.jetstack.io" +TRUST_MANAGER_HELM_REPO_NAME = "jetstack" +TRUST_MANAGER_HELM_CHART = "jetstack/trust-manager" + +# --------------------------------------------------------------------------- +# WO Extension Defaults +# --------------------------------------------------------------------------- +DEFAULT_EXTENSION_TYPE = "Microsoft.workloadorchestration" +DEFAULT_EXTENSION_NAME = "wo-extension" +DEFAULT_RELEASE_TRAIN = "stable" +DEFAULT_EXTENSION_NAMESPACE = "workloadorchestration" +DEFAULT_EXTENSION_SCOPE = "cluster" + +# --------------------------------------------------------------------------- +# Limits & Timeouts +# --------------------------------------------------------------------------- +MAX_HIERARCHY_NAME_LENGTH = 24 # Configuration resource name limit +LRO_TIMEOUT_SECONDS = 600 # 10 minutes per LRO step +LRO_DEFAULT_POLL_INTERVAL = 15 # seconds, overridden by Retry-After header +CERT_MANAGER_WAIT_TIMEOUT = "300s" + +# --------------------------------------------------------------------------- +# Default Target Specification (helm.v3) +# --------------------------------------------------------------------------- +DEFAULT_TARGET_SPECIFICATION = { + "topologies": [{ + "bindings": [{ + "role": "helm.v3", + "provider": "providers.target.helm", + "config": {"inCluster": "true"} + }] + }] +} diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/context_init.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/context_init.py new file mode 100644 index 00000000000..9572851df46 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/context_init.py @@ -0,0 +1,283 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Context initialization for onboarding simplification. + +Finds or creates a WO context, sets it as current, and ensures the required +capabilities and hierarchy levels are present. + +Usage (called by target create --init-context): + context_id = handle_init_context(cli_ctx, ctx_name, rg, location, + hierarchy_level, capabilities) +""" + +# pylint: disable=broad-exception-caught + +import json +import logging + +from azure.cli.core.azclierror import CLIInternalError + +from azext_workload_orchestration.onboarding.consts import ( + ARM_ENDPOINT, + CONTEXT_API_VERSION, +) +from azext_workload_orchestration.onboarding.utils import ( + CmdProxy, + invoke_cli_command, + invoke_silent, + parse_arm_id, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + +def handle_init_context(cli_ctx, ctx_name, resource_group, location, + hierarchy_level, capabilities): + """Find or create a WO context and return its ARM resource ID. + + Strategy (in order): + 1. Check if a context is already set in CLI config → use it + 2. List contexts in the target's resource group → use first match + 3. Create a new context with the given name + 4. If create fails (e.g. name conflict), search subscription-wide + + After resolving the context, ensures the required hierarchy level and + capabilities are present (adds them if missing). + + Returns: + str: The ARM resource ID of the context. + + Raises: + CLIInternalError: If no context can be found or created. + """ + import configparser + + cmd = CmdProxy(cli_ctx) + + # ------------------------------------------------------------------ + # 1. Check CLI config for an already-set context + # ------------------------------------------------------------------ + try: + existing_ctx_id = cli_ctx.config.get('workload_orchestration', 'context_id') + if existing_ctx_id: + logger.info("Context already set in config: %s", existing_ctx_id) + _ensure_capabilities(cli_ctx, existing_ctx_id, hierarchy_level, capabilities) + print("[init-context] Using existing context [OK]") + return existing_ctx_id + except (configparser.NoSectionError, configparser.NoOptionError): + pass + + # ------------------------------------------------------------------ + # 2. List contexts in this resource group + # ------------------------------------------------------------------ + try: + existing = invoke_cli_command(cmd, [ + "workload-orchestration", "context", "list", "-g", resource_group + ]) + if existing and isinstance(existing, list) and len(existing) > 0: + ctx_id = existing[0].get("id", "") + if ctx_id: + parts = parse_arm_id(ctx_id) + found_name = parts.get("contexts", ctx_name) + found_rg = parts.get("resourcegroups", resource_group) + _set_current(found_name, found_rg) + _ensure_capabilities(cli_ctx, ctx_id, hierarchy_level, capabilities) + print(f"[init-context] Using existing context '{found_name}' [OK]") + return ctx_id + except Exception: + pass # No contexts found — proceed to create + + # ------------------------------------------------------------------ + # 3. Create a new context + # ------------------------------------------------------------------ + print(f"[init-context] Creating context '{ctx_name}'...") + + create_args = _build_create_args(ctx_name, resource_group, location, + hierarchy_level, capabilities) + exit_code = invoke_silent(create_args) + + if exit_code == 0: + _set_current(ctx_name, resource_group) + + # Read back the context ID from config (set by 'context use') + try: + ctx_id = cli_ctx.config.get('workload_orchestration', 'context_id') + if ctx_id: + print(f"[init-context] Context '{ctx_name}' created [OK]") + return ctx_id + except (configparser.NoSectionError, configparser.NoOptionError): + pass + + # Fallback: construct the ID manually + sub_id = cli_ctx.data.get('subscription_id', '') + ctx_id = (f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" + f"/providers/Microsoft.Edge/contexts/{ctx_name}") + print(f"[init-context] Context '{ctx_name}' created [OK]") + return ctx_id + + # ------------------------------------------------------------------ + # 4. Create failed — search subscription-wide + # ------------------------------------------------------------------ + logger.warning("Context create returned exit %d. Searching subscription...", exit_code) + ctx_id = _search_subscription(cli_ctx) + if ctx_id: + parts = parse_arm_id(ctx_id) + found_name = parts.get("contexts", "unknown") + found_rg = parts.get("resourcegroups", resource_group) + _set_current(found_name, found_rg) + _ensure_capabilities(cli_ctx, ctx_id, hierarchy_level, capabilities) + print(f"[init-context] Using existing context '{found_name}' in RG '{found_rg}' [OK]") + return ctx_id + + raise CLIInternalError( + "Could not create or find an existing context. " + "Please provide --context-id explicitly." + ) + + +# --------------------------------------------------------------------------- +# Private helpers +# --------------------------------------------------------------------------- + +def _build_create_args(ctx_name, resource_group, location, + hierarchy_level, capabilities): + """Build the arg list for 'az workload-orchestration context create'.""" + # Capabilities: [0].name=X [0].description=X [1].name=Y ... + cap_args = [] + for i, cap in enumerate(capabilities or []): + cap_args.extend([f"[{i}].name={cap}", f"[{i}].description={cap}"]) + + hier_args = [f"[0].name={hierarchy_level}", f"[0].description={hierarchy_level}"] + + args = [ + "workload-orchestration", "context", "create", + "-g", resource_group, "-l", location, "--name", ctx_name, + "--hierarchies", + ] + hier_args + + if cap_args: + args.append("--capabilities") + args.extend(cap_args) + + args.extend(["-o", "none"]) + return args + + +def _set_current(ctx_name, ctx_rg): + """Set a context as the CLI default (silently).""" + invoke_silent([ + "workload-orchestration", "context", "use", + "--name", ctx_name, "-g", ctx_rg, "-o", "none", + ]) + + +def _search_subscription(cli_ctx): + """Search the entire subscription for any existing context. Returns ID or None.""" + from azure.cli.core.util import send_raw_request + + sub_id = cli_ctx.data.get('subscription_id', '') + try: + resp = send_raw_request( + cli_ctx, + method="GET", + url=(f"{ARM_ENDPOINT}/subscriptions/{sub_id}" + f"/providers/Microsoft.Edge/contexts" + f"?api-version={CONTEXT_API_VERSION}"), + resource=ARM_ENDPOINT, + ) + if resp.status_code == 200: + contexts = resp.json().get("value", []) + if contexts: + return contexts[0].get("id") + except Exception as exc: + logger.warning("Subscription-wide context search failed: %s", exc) + return None + + +def _ensure_capabilities(cli_ctx, ctx_id, hierarchy_level, capabilities): + """Add missing capabilities/hierarchies to an existing context via PUT.""" + if not capabilities: + return + + cmd = CmdProxy(cli_ctx) + parts = parse_arm_id(ctx_id) + ctx_rg = parts.get("resourcegroups") + ctx_name = parts.get("contexts") + sub_id = parts.get("subscriptions") + + if not ctx_rg or not ctx_name: + return + + # Get current context state + try: + ctx_data = invoke_cli_command(cmd, [ + "workload-orchestration", "context", "show", + "-g", ctx_rg, "--name", ctx_name, + ]) + except Exception: + return + + if not ctx_data or not isinstance(ctx_data, dict): + return + + props = ctx_data.get("properties", {}) + existing_caps = {c.get("name", "") for c in (props.get("capabilities") or [])} + existing_hiers = {h.get("name", "") for h in (props.get("hierarchies") or [])} + + missing_caps = [c for c in capabilities if c not in existing_caps] + missing_hier = hierarchy_level not in existing_hiers + + if not missing_caps and not missing_hier: + return # Nothing to add + + # Merge existing + new + all_caps = list(props.get("capabilities") or []) + for cap in missing_caps: + all_caps.append({"name": cap, "description": cap}) + + all_hiers = list(props.get("hierarchies") or []) + if missing_hier: + all_hiers.append({"name": hierarchy_level, "description": hierarchy_level}) + + print(f"[init-context] Adding capabilities {missing_caps} to context...") + + # PUT updated context + from azure.cli.core.util import send_raw_request + + if not sub_id: + sub_id = cli_ctx.data.get('subscription_id', '') + + location = ctx_data.get("location", "") + body = { + "location": location, + "properties": { + "capabilities": [{"name": c.get("name", ""), "description": c.get("description", "")} + for c in all_caps], + "hierarchies": [{"name": h.get("name", ""), "description": h.get("description", "")} + for h in all_hiers], + } + } + + try: + resp = send_raw_request( + cli_ctx, + method="PUT", + url=(f"{ARM_ENDPOINT}/subscriptions/{sub_id}" + f"/resourceGroups/{ctx_rg}/providers/Microsoft.Edge" + f"/contexts/{ctx_name}?api-version={CONTEXT_API_VERSION}"), + body=json.dumps(body), + resource=ARM_ENDPOINT, + ) + if resp.status_code in (200, 201): + print("[init-context] Capabilities updated [OK]") + else: + logger.warning("Context update returned %d: %s", resp.status_code, resp.text) + except Exception as exc: + logger.warning("Failed to update context capabilities: %s", exc) diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/hierarchy_create.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/hierarchy_create.py new file mode 100644 index 00000000000..c9b1dd5eba9 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/hierarchy_create.py @@ -0,0 +1,379 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Hierarchy create command - creates Site + Configuration + ConfigurationReference. + +Supports two hierarchy types: + - ResourceGroup: Single site in a resource group (no children) + - ServiceGroup: Nested sites under a service group (up to 3 levels) + +For ResourceGroup: + az workload-orchestration hierarchy create \\ + --resource-group rg --configuration-location eastus2euap \\ + --hierarchy-spec "@hierarchy.yaml" + + hierarchy.yaml: + name: Mehoopany + level: factory + +For ServiceGroup: + az workload-orchestration hierarchy create \\ + --configuration-location eastus2euap \\ + --hierarchy-spec "@hierarchy.yaml" + + hierarchy.yaml: + type: ServiceGroup + name: India + level: country + children: + name: Karnataka + level: region + children: + - name: BangaloreSouth + level: factory +""" + +# pylint: disable=broad-exception-caught +# pylint: disable=too-many-locals + +import json +import logging + +from azure.cli.core.azclierror import ( + CLIInternalError, + ValidationError, +) +from azure.cli.core.util import send_raw_request + +from azext_workload_orchestration.onboarding.consts import ( + ARM_ENDPOINT, + SERVICE_GROUP_API_VERSION, + SITE_API_VERSION, + CONFIGURATION_API_VERSION, + CONFIG_REF_API_VERSION, + EDGE_RP_NAMESPACE, +) + +logger = logging.getLogger(__name__) + +MAX_SG_DEPTH = 3 + + +def hierarchy_create(cmd, resource_group=None, configuration_location=None, hierarchy_spec=None): + """Create a hierarchy: Site + Configuration + ConfigurationReference. + + Parses the hierarchy spec (YAML/JSON or shorthand) and creates + the full resource stack. + """ + if not hierarchy_spec: + raise ValidationError("--hierarchy-spec is required.") + if not configuration_location: + raise ValidationError("--configuration-location is required.") + if not resource_group: + raise ValidationError("--resource-group is required (used for Configuration resources).") + + # Parse spec (could be dict from shorthand or file) + spec = hierarchy_spec if isinstance(hierarchy_spec, dict) else hierarchy_spec + + name = spec.get("name") + level = spec.get("level") + hierarchy_type = spec.get("type", "ResourceGroup") + + if not name: + raise ValidationError("hierarchy-spec must include 'name'.") + if not level: + raise ValidationError("hierarchy-spec must include 'level'.") + + if hierarchy_type == "ServiceGroup": + return _create_sg_hierarchy(cmd, spec, configuration_location, resource_group) + else: + return _create_rg_hierarchy(cmd, resource_group, configuration_location, name, level) + + +# --------------------------------------------------------------------------- +# ResourceGroup hierarchy +# --------------------------------------------------------------------------- + +def _create_rg_hierarchy(cmd, resource_group, config_location, name, level): + """Create Site + Configuration + ConfigurationReference in a resource group.""" + sub_id = _get_sub_id(cmd) + total = 3 + step = [0] + + def _log(msg, status=""): + if status: + print(f"[{step[0]}/{total}] {msg}... {status}") + else: + step[0] += 1 + print(f"[{step[0]}/{total}] {msg}...") + + site_id = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}/providers/{EDGE_RP_NAMESPACE}/sites/{name}" + config_name = f"{name}Config" + config_id = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}/providers/{EDGE_RP_NAMESPACE}/configurations/{config_name}" + + print(f"\nCreating hierarchy '{name}' (level: {level}) in RG '{resource_group}'...\n") + + # Step 1: Create Site + _log(f"Site '{name}' ({level})") + _arm_put(cmd, f"{ARM_ENDPOINT}{site_id}", { + "properties": { + "displayName": name, + "description": name, + "labels": {"level": level}, + } + }, SITE_API_VERSION) + _log(f"Site '{name}'", "[OK]") + + # Step 2: Create Configuration + _log(f"Configuration '{config_name}'") + _arm_put(cmd, f"{ARM_ENDPOINT}{config_id}", { + "location": config_location, + }, CONFIGURATION_API_VERSION) + _log(f"Configuration '{config_name}'", "[OK]") + + # Step 3: Create ConfigurationReference (links site → config) + config_ref_url = f"{ARM_ENDPOINT}{site_id}/providers/{EDGE_RP_NAMESPACE}/configurationReferences/default" + _log("ConfigurationReference") + _arm_put(cmd, config_ref_url, { + "properties": { + "configurationResourceId": config_id, + } + }, CONFIG_REF_API_VERSION) + _log("ConfigurationReference", "[OK]") + + print(f"\nHierarchy '{name}' created successfully (3 resources).\n") + + return { + "type": "ResourceGroup", + "name": name, + "level": level, + "resourceGroup": resource_group, + "siteId": site_id, + "configurationId": config_id, + } + + +# --------------------------------------------------------------------------- +# ServiceGroup hierarchy (recursive, max 3 levels) +# --------------------------------------------------------------------------- + +def _create_sg_hierarchy(cmd, spec, config_location, resource_group): + """Create ServiceGroup + nested Sites + Configurations recursively.""" + sub_id = _get_sub_id(cmd) + tenant_id = _get_tenant_id(cmd) + + # Count total nodes + nodes = _count_nodes(spec) + if nodes > MAX_SG_DEPTH: + raise ValidationError( + f"ServiceGroup hierarchy has {nodes} levels. Maximum is {MAX_SG_DEPTH}." + ) + + print(f"\nCreating ServiceGroup hierarchy '{spec['name']}' ({nodes} levels)...\n") + + results = [] + _create_sg_level(cmd, spec, config_location, sub_id, tenant_id, + resource_group, parent_sg=None, results=results, depth=0) + + print(f"\nHierarchy created successfully ({nodes} levels, {len(results)} resources).\n") + + return { + "type": "ServiceGroup", + "name": spec["name"], + "levels": nodes, + "resources": results, + } + + +def _create_sg_level(cmd, node, config_location, sub_id, tenant_id, resource_group, parent_sg, results, depth): + """Recursively create SG + Site + Config + ConfigRef at each level.""" + import time + + name = node["name"] + level = node["level"] + + # Create or reference the ServiceGroup + if parent_sg: + parent_id = f"/providers/Microsoft.Management/serviceGroups/{parent_sg}" + else: + parent_id = f"/providers/Microsoft.Management/serviceGroups/{tenant_id}" + + sg_id = f"/providers/Microsoft.Management/serviceGroups/{name}" + indent = " " * depth + + # 1. Create ServiceGroup + print(f"{indent}[+] ServiceGroup '{name}'...") + try: + _arm_put(cmd, f"{ARM_ENDPOINT}{sg_id}", { + "properties": { + "displayName": name, + "parent": {"resourceId": parent_id}, + } + }, SERVICE_GROUP_API_VERSION) + print(f"{indent}[+] ServiceGroup '{name}'... [OK]") + results.append({"type": "ServiceGroup", "name": name, "id": sg_id}) + except Exception as exc: + logger.warning("ServiceGroup creation failed: %s", exc) + raise CLIInternalError(f"ServiceGroup '{name}' creation failed: {exc}") + + # Wait for RBAC propagation on new SG scope + print(f"{indent} Waiting for RBAC propagation...") + _wait_for_sg_rbac(cmd, config_location, sg_id, name) + + # 2. Create Site under ServiceGroup (regional endpoint) + site_id = f"{sg_id}/providers/{EDGE_RP_NAMESPACE}/sites/{name}" + print(f"{indent} [+] Site '{name}' ({level})...") + _arm_put_regional(cmd, config_location, site_id, { + "properties": { + "displayName": name, + "description": name, + "labels": {"level": level}, + } + }, SITE_API_VERSION) + print(f"{indent} [+] Site '{name}'... [OK]") + results.append({"type": "Site", "name": name, "level": level, "id": site_id}) + + # 3. Create Configuration (RG-scoped, NOT under SG) + config_name = f"{name}Config" + config_id = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}/providers/{EDGE_RP_NAMESPACE}/configurations/{config_name}" + print(f"{indent} [+] Configuration '{config_name}' (in RG: {resource_group})...") + _arm_put(cmd, f"{ARM_ENDPOINT}{config_id}", { + "location": config_location, + }, CONFIGURATION_API_VERSION) + print(f"{indent} [+] Configuration '{config_name}'... [OK]") + results.append({"type": "Configuration", "name": config_name, "id": config_id}) + + # 4. Create ConfigurationReference on Site (regional, links site -> RG config) + config_ref_id = f"{site_id}/providers/{EDGE_RP_NAMESPACE}/configurationReferences/default" + print(f"{indent} [+] ConfigurationReference...") + _arm_put_regional(cmd, config_location, config_ref_id, { + "properties": { + "configurationResourceId": config_id, + } + }, CONFIG_REF_API_VERSION) + print(f"{indent} [+] ConfigurationReference... [OK]") + results.append({"type": "ConfigurationReference", "siteId": site_id}) + + # Recurse into children + children = node.get("children") + if children: + if isinstance(children, dict): + children = [children] + for child in children: + _create_sg_level(cmd, child, config_location, sub_id, tenant_id, + resource_group, parent_sg=name, results=results, depth=depth + 1) + + +def _count_nodes(node): + """Count total depth of hierarchy tree.""" + children = node.get("children") + if not children: + return 1 + if isinstance(children, dict): + return 1 + _count_nodes(children) + return 1 + max(_count_nodes(c) for c in children) + + +# --------------------------------------------------------------------------- +# ARM helpers +# --------------------------------------------------------------------------- + +def _arm_put(cmd, url, body, api_version): + """PUT to ARM endpoint.""" + full_url = f"{url}?api-version={api_version}" + send_raw_request( + cmd.cli_ctx, "PUT", full_url, + body=json.dumps(body), + headers=["Content-Type=application/json"], + resource=ARM_ENDPOINT, + ) + + +def _arm_put_regional(cmd, location, resource_id, body, api_version): + """PUT to regional ARM endpoint (for SG-scoped resources).""" + full_url = f"https://{location}.management.azure.com{resource_id}?api-version={api_version}" + body_str = json.dumps(body) + + token_type, token = _get_token(cmd) + + send_raw_request( + cmd.cli_ctx, "PUT", full_url, + body=body_str, + headers=[ + f"Authorization={token_type} {token}", + "Content-Type=application/json", + ], + skip_authorization_header=True, + ) + + +def _arm_get_regional(cmd, location, resource_id, api_version): + """GET from regional ARM endpoint.""" + full_url = f"https://{location}.management.azure.com{resource_id}?api-version={api_version}" + + token_type, token = _get_token(cmd) + + resp = send_raw_request( + cmd.cli_ctx, "GET", full_url, + headers=[ + f"Authorization={token_type} {token}", + ], + skip_authorization_header=True, + ) + return resp + + +def _wait_for_sg_rbac(cmd, location, sg_id, sg_name, max_retries=18, wait_sec=10): + """Wait for RBAC to propagate on a newly created ServiceGroup. + + After SG creation, it takes time for permissions to propagate. + We poll by trying to list sites under the SG until it succeeds. + """ + import time + + site_list_id = f"{sg_id}/providers/{EDGE_RP_NAMESPACE}/sites" + + for attempt in range(max_retries): + try: + _arm_get_regional(cmd, location, site_list_id, SITE_API_VERSION) + logger.info("RBAC propagated for SG '%s' after %ds", sg_name, attempt * wait_sec) + return + except Exception: + if attempt < max_retries - 1: + logger.debug("RBAC not ready (attempt %d/%d), waiting %ds...", attempt + 1, max_retries, wait_sec) + time.sleep(wait_sec) + else: + logger.warning( + "RBAC propagation timeout for SG '%s' after %ds. Continuing anyway...", + sg_name, max_retries * wait_sec + ) + + +def _get_token(cmd): + """Get ARM bearer token.""" + from azure.cli.core._profile import Profile + profile = Profile(cli_ctx=cmd.cli_ctx) + token_info, _, _ = profile.get_raw_token( + resource="https://management.azure.com", + subscription=profile.get_subscription_id() + ) + return token_info[0], token_info[1] # token_type, token + + +def _get_sub_id(cmd): + """Get subscription ID.""" + sub_id = cmd.cli_ctx.data.get('subscription_id') + if not sub_id: + from azure.cli.core._profile import Profile + sub_id = Profile(cli_ctx=cmd.cli_ctx).get_subscription_id() + return sub_id + + +def _get_tenant_id(cmd): + """Get tenant ID.""" + from azure.cli.core._profile import Profile + profile = Profile(cli_ctx=cmd.cli_ctx) + _, _, tenant_id = profile.get_raw_token(resource="https://management.azure.com") + return tenant_id diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/hierarchy_init.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/hierarchy_init.py new file mode 100644 index 00000000000..4248e6141a0 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/hierarchy_init.py @@ -0,0 +1,161 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Lightweight hierarchy initialization for target create --init-hierarchy. + +Creates a simple site + configuration + config-reference + site-reference +in a resource group scope (no service group). This is the "RG-scoped" +hierarchy used when a user just wants a quick site without the full +hierarchy_create flow (which requires a Service Group parent). + +Usage (called by target create --init-hierarchy): + handle_init_hierarchy(cli_ctx, site_name, resource_group, location, + hierarchy_level, context_id) +""" + +# pylint: disable=broad-exception-caught + +import json +import logging + +from azure.cli.core.util import send_raw_request + +from azext_workload_orchestration.onboarding.consts import ( + ARM_ENDPOINT, + SITE_API_VERSION, + CONFIGURATION_API_VERSION, + CONFIG_REF_API_VERSION, +) +from azext_workload_orchestration.onboarding.utils import ( + invoke_silent, + parse_arm_id, +) + +logger = logging.getLogger(__name__) + + +def handle_init_hierarchy(cli_ctx, site_name, resource_group, location, + hierarchy_level, context_id=None): + """Create a minimal RG-scoped hierarchy: Site → Configuration → ConfigRef → SiteRef. + + Steps: + 1. PUT site at regional endpoint + 2. PUT configuration at regional endpoint + 3. PUT configuration-reference (links config → site) + 4. Create site-reference via CLI (links site → context) + + All PUTs are idempotent — safe to re-run. + + Args: + cli_ctx: Azure CLI context (from self.ctx.cli_ctx) + site_name: Name for the new site + resource_group: Target resource group + location: Azure region (e.g., eastus2euap) + hierarchy_level: Level label (e.g., "line", "factory") + context_id: Optional ARM ID of the context to link to + """ + # Get subscription ID — prefer extracting from context_id, fall back to CLI profile + if context_id: + parts = parse_arm_id(context_id) + sub_id = parts.get("subscriptions", "") + else: + sub_id = "" + if not sub_id: + from azure.cli.core._profile import Profile + sub_id = Profile(cli_ctx=cli_ctx).get_subscription_id() + regional_base = f"https://{location}.management.azure.com" + + site_id = (f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" + f"/providers/Microsoft.Edge/sites/{site_name}") + config_id = (f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" + f"/providers/Microsoft.Edge/configurations/{site_name}") + + print(f"[init-hierarchy] Creating site '{site_name}'...") + + # Step 1: Create Site (regional endpoint) + _put_resource( + cli_ctx, + url=f"{regional_base}{site_id}?api-version={SITE_API_VERSION}", + body={ + "properties": { + "displayName": site_name, + "description": site_name, + "labels": {"level": hierarchy_level or "line"}, + } + }, + label="Site", + ) + + # Step 2: Create Configuration (regional endpoint) + _put_resource( + cli_ctx, + url=f"{regional_base}{config_id}?api-version={CONFIGURATION_API_VERSION}", + body={"location": location}, + label="Configuration", + ) + + # Step 3: Create Configuration Reference (links config → site) + config_ref_url = ( + f"{ARM_ENDPOINT}{site_id}" + f"/providers/Microsoft.Edge/configurationreferences/default" + f"?api-version={CONFIG_REF_API_VERSION}" + ) + _put_resource( + cli_ctx, + url=config_ref_url, + body={"properties": {"configurationResourceId": config_id}}, + label="Configuration Reference", + ) + + # Step 4: Create Site Reference (links site → context) + if context_id: + _create_site_reference(context_id, site_name, site_id) + + print(f"[init-hierarchy] Site '{site_name}' + config + references created [OK]") + + +# --------------------------------------------------------------------------- +# Private helpers +# --------------------------------------------------------------------------- + +def _put_resource(cli_ctx, url, body, label): + """PUT a resource via send_raw_request. Logs on failure but doesn't crash.""" + try: + resp = send_raw_request( + cli_ctx, + method="PUT", + url=url, + body=json.dumps(body), + resource=ARM_ENDPOINT, + headers=["Content-Type=application/json"], + ) + if resp.status_code in (200, 201): + logger.info("%s created/updated successfully", label) + else: + logger.warning("%s PUT returned %d: %s", label, resp.status_code, resp.text) + except Exception as exc: + logger.warning("%s creation failed: %s", label, exc) + raise + + +def _create_site_reference(context_id, site_name, site_id): + """Create a site-reference linking the site to the context.""" + parts = parse_arm_id(context_id) + ctx_rg = parts.get("resourcegroups", "") + ctx_name = parts.get("contexts", "default") + + if not ctx_rg: + return + + try: + invoke_silent([ + "workload-orchestration", "context", "site-reference", "create", + "-g", ctx_rg, "--context-name", ctx_name, + "--name", f"{site_name}-ref", + "--site-id", site_id, + "-o", "none", + ]) + except Exception: + pass # Site reference may already exist diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/target_deploy.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/target_deploy.py new file mode 100644 index 00000000000..2a620c7f216 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/target_deploy.py @@ -0,0 +1,466 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Target deploy command - chains review -> publish -> install in one step. + +Replaces 3 manual commands: + 1. az workload-orchestration target review + 2. az workload-orchestration target publish + 3. az workload-orchestration target install + +Optionally prepends config-set (step 0) when --config is provided. + +Usage: + # Friendly name + az workload-orchestration target deploy \\ + -g my-rg -n my-target \\ + --solution-template-name tmpl --solution-template-version 1.0.0 + + # ARM ID + az workload-orchestration target deploy \\ + -g my-rg -n my-target \\ + --solution-template-version-id + + # With config + az workload-orchestration target deploy \\ + -g my-rg -n my-target \\ + --solution-template-version-id \\ + --config values.yaml \\ + --config-template-rg rg --config-template-name tmpl --config-template-version 1.0.0 +""" + +import json +import logging + +from azure.cli.core.azclierror import CLIInternalError, ValidationError +from azure.cli.core.util import send_raw_request + +logger = logging.getLogger(__name__) + +API_VERSION = "2025-08-01" +ARM_RESOURCE = "https://management.azure.com" + + +def target_deploy( + cmd, + resource_group, + target_name, + solution_template_version_id=None, + solution_template_name=None, + solution_template_version=None, + solution_instance_name=None, + solution_dependencies=None, + config=None, + config_hierarchy_id=None, + config_template_rg=None, + config_template_name=None, + config_template_version=None, +): + """Deploy a solution to a target: config-set → review → publish → install. + + Standalone deploy function (used internally). + """ + sub_id = _get_subscription_id(cmd) + + # --- Resolve solution-template-version-id --- + solution_template_version_id = _resolve_template_version_id( + solution_template_version_id, solution_template_name, + solution_template_version, None, + resource_group, sub_id, + ) + + base_url = ( + f"{ARM_RESOURCE}/subscriptions/{sub_id}" + f"/resourceGroups/{resource_group}" + f"/providers/Microsoft.Edge/targets/{target_name}" + ) + + # Figure out which steps to run + do_config = config is not None + + total = sum([do_config, True, True, True]) # config(opt) + review + publish + install + current = [0] # mutable counter + + def _log(step_name, status=""): + if status: + print(f"[{current[0]}/{total}] {step_name}... {status}") + else: + current[0] += 1 + print(f"[{current[0]}/{total}] {step_name}...") + + results = {} + sv_id = None + + # --- Step 0: Config set --- + if do_config: + _log("Config Set") + _handle_config_set( + cmd, config, config_hierarchy_id, config_template_rg, + config_template_name, config_template_version, + resource_group, target_name, sub_id, + ) + _log("Config Set", "[OK]") + results["configSet"] = "Succeeded" + + # --- Step 1: Review --- + _log("Review") + review_result = _do_review( + cmd, base_url, solution_template_version_id, + solution_instance_name, solution_dependencies, + ) + results["review"] = review_result + sv_id = _extract_solution_version_id(review_result) + _log("Review", f"[OK] -> solutionVersionId: {_short_id(sv_id)}") + + # --- Step 2: Publish --- + _log("Publish") + publish_result = _do_publish(cmd, base_url, sv_id) + results["publish"] = publish_result + _log("Publish", "[OK]") + + # --- Step 3: Install --- + _log("Install") + install_result = _do_install(cmd, base_url, sv_id) + results["install"] = install_result + _log("Install", "[OK]") + + print(f"\n{'=' * 50}") + print(f"Deployment complete for target '{target_name}'") + print(f"Solution Version ID: {sv_id}") + print(f"{'=' * 50}") + + # Return the install LRO result (same format as `az wo target install`) + return results.get("install", { + "status": "Succeeded", + "resourceId": f"{base_url}", + }) + + +def target_deploy_pre_install( + cmd, + resource_group, + target_name, + solution_template_version_id=None, + solution_template_name=None, + solution_template_version=None, + solution_instance_name=None, + solution_dependencies=None, + config=None, +): + """Run config-set → review → publish and return the solution-version-id. + + Called by the enhanced `target install` command before the AAZ install step. + Does NOT run install — that's handled by the AAZ LRO. + + When using friendly name, the target's resource_group is used for the ST. + Config-template args are auto-derived from solution template args. + """ + sub_id = _get_subscription_id(cmd) + + solution_template_version_id = _resolve_template_version_id( + solution_template_version_id, solution_template_name, + solution_template_version, None, + resource_group, sub_id, + ) + + base_url = ( + f"{ARM_RESOURCE}/subscriptions/{sub_id}" + f"/resourceGroups/{resource_group}" + f"/providers/Microsoft.Edge/targets/{target_name}" + ) + + do_config = config is not None + total = sum([do_config, True, True, True]) # config + review + publish + install(AAZ) + current = [0] + + def _log(step_name, status=""): + if status: + print(f"[{current[0]}/{total}] {step_name}... {status}") + else: + current[0] += 1 + print(f"[{current[0]}/{total}] {step_name}...") + + # --- Step 0: Config set --- + if do_config: + _log("Config Set") + # Auto-derive config template args from solution template args + ct_rg = resource_group + ct_name = solution_template_name + ct_version = solution_template_version + + # If using ARM ID, extract name/version/rg from it + if not ct_name and solution_template_version_id: + parts = solution_template_version_id.strip("/").split("/") + # .../resourceGroups/{rg}/providers/Microsoft.Edge/solutionTemplates/{name}/versions/{ver} + for i, part in enumerate(parts): + if part.lower() == "resourcegroups" and i + 1 < len(parts): + ct_rg = parts[i + 1] + elif part.lower() == "solutiontemplates" and i + 1 < len(parts): + ct_name = parts[i + 1] + elif part.lower() == "versions" and i + 1 < len(parts): + ct_version = parts[i + 1] + + _handle_config_set( + cmd, config, None, ct_rg, + ct_name, ct_version, + resource_group, target_name, sub_id, + ) + _log("Config Set", "[OK]") + + # --- Step 1: Review --- + _log("Review") + review_result = _do_review( + cmd, base_url, solution_template_version_id, + solution_instance_name, solution_dependencies, + ) + sv_id = _extract_solution_version_id(review_result) + _log("Review", f"[OK] -> solutionVersionId: {_short_id(sv_id)}") + + # --- Step 2: Publish --- + _log("Publish") + _do_publish(cmd, base_url, sv_id) + _log("Publish", "[OK]") + + # Step 3 (Install) is handled by AAZ LRO + _log("Install") + + return sv_id + + +# --------------------------------------------------------------------------- +# Resolution helpers +# --------------------------------------------------------------------------- + +def _get_subscription_id(cmd): + """Get subscription ID from CLI context.""" + sub_id = cmd.cli_ctx.data.get('subscription_id') + if not sub_id: + from azure.cli.core._profile import Profile + sub_id = Profile(cli_ctx=cmd.cli_ctx).get_subscription_id() + return sub_id + + +def _resolve_template_version_id( + arm_id, template_name, template_version, _unused, + default_rg, sub_id, +): + """Resolve solution-template-version-id from friendly name or ARM ID. + + Mutual exclusivity: + - Provide --solution-template-version-id (full ARM ID) + - OR --solution-template-name + --solution-template-version (friendly) + + When using friendly name, the target's resource group is used. + """ + if arm_id and template_name: + raise ValidationError( + "Provide either --solution-template-version-id OR " + "(--solution-template-name + --solution-template-version), not both." + ) + + if arm_id: + return arm_id + + if template_name: + if not template_version: + raise ValidationError( + "--solution-template-version is required when using --solution-template-name." + ) + return ( + f"/subscriptions/{sub_id}/resourceGroups/{default_rg}" + f"/providers/Microsoft.Edge/solutionTemplates/{template_name}" + f"/versions/{template_version}" + ) + + raise ValidationError( + "Provide either --solution-template-version-id or " + "(--solution-template-name + --solution-template-version)." + ) + + +# --------------------------------------------------------------------------- +# Step implementations +# --------------------------------------------------------------------------- + +def _do_review(cmd, base_url, solution_template_version_id, + solution_instance_name=None, solution_dependencies=None): + """POST .../reviewSolutionVersion""" + url = f"{base_url}/reviewSolutionVersion?api-version={API_VERSION}" + body = { + "solutionTemplateVersionId": solution_template_version_id, + } + if solution_instance_name: + body["solutionInstanceName"] = solution_instance_name + if solution_dependencies: + body["solutionDependencies"] = ( + json.loads(solution_dependencies) + if isinstance(solution_dependencies, str) + else solution_dependencies + ) + + resp = send_raw_request( + cmd.cli_ctx, "POST", url, + body=json.dumps(body), + headers=["Content-Type=application/json"], + resource=ARM_RESOURCE, + ) + return _parse_response(resp, "Review", cmd=cmd) + + +def _do_publish(cmd, base_url, solution_version_id): + """POST .../publishSolutionVersion""" + url = f"{base_url}/publishSolutionVersion?api-version={API_VERSION}" + body = {"solutionVersionId": solution_version_id} + + resp = send_raw_request( + cmd.cli_ctx, "POST", url, + body=json.dumps(body), + headers=["Content-Type=application/json"], + resource=ARM_RESOURCE, + ) + return _parse_response(resp, "Publish", cmd=cmd) + + +def _do_install(cmd, base_url, solution_version_id): + """POST .../installSolution""" + url = f"{base_url}/installSolution?api-version={API_VERSION}" + body = {"solutionVersionId": solution_version_id} + + resp = send_raw_request( + cmd.cli_ctx, "POST", url, + body=json.dumps(body), + headers=["Content-Type=application/json"], + resource=ARM_RESOURCE, + ) + + return _parse_response(resp, "Install", cmd=cmd) + + +def _handle_config_set( + cmd, config_file, hierarchy_id, template_rg, + template_name, template_version, + resource_group, target_name, sub_id, +): + """Set configuration values from file before review. + + Delegates to: az workload-orchestration configuration set + """ + if not hierarchy_id: + hierarchy_id = ( + f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" + f"/providers/Microsoft.Edge/targets/{target_name}" + ) + + if not template_rg or not template_name or not template_version: + raise ValidationError( + "When using --config, you must also provide " + "--config-template-rg, --config-template-name, and --config-template-version." + ) + + from azext_workload_orchestration.onboarding.utils import invoke_cli_command + invoke_cli_command(cmd, [ + "workload-orchestration", "configuration", "set", + "--hierarchy-id", hierarchy_id, + "--template-rg", template_rg, + "--template-name", template_name, + "--version", template_version, + "--file", config_file, + "--solution", + ], expect_json=False) + + +# --------------------------------------------------------------------------- +# LRO and response helpers +# --------------------------------------------------------------------------- + +def _parse_response(resp, step_name, cmd=None): + """Parse REST response, handling 200/201/202 LRO patterns.""" + status = resp.status_code + if status in (200, 201): + try: + return resp.json() + except Exception: + return {"status": "Succeeded"} + if status == 202: + return _poll_lro(resp, step_name, cmd=cmd) + + # Error + try: + error_body = resp.text + except Exception: + error_body = f"HTTP {status}" + raise CLIInternalError(f"{step_name} failed (HTTP {status}): {error_body}") + + +def _poll_lro(resp, step_name, cmd=None): + """Poll an LRO via Location or Azure-AsyncOperation header.""" + import time + + location = resp.headers.get("Location") or resp.headers.get("Azure-AsyncOperation") + if not location: + logger.warning("No LRO polling URL in %s response headers", step_name) + return {"status": "Accepted"} + + retry_after = int(resp.headers.get("Retry-After", "10")) + max_polls = 60 # ~10 min max + + for i in range(max_polls): + time.sleep(retry_after) + try: + poll_resp = send_raw_request(cmd.cli_ctx, "GET", location, resource=ARM_RESOURCE) + except Exception: + logger.debug("LRO poll attempt %d failed for %s", i + 1, step_name) + continue + + try: + body = poll_resp.json() + except Exception: + continue + + poll_status = body.get("status", "").lower() + if poll_status in ("succeeded", "completed"): + return body + if poll_status in ("failed", "canceled", "cancelled"): + raise CLIInternalError( + f"{step_name} LRO failed: {json.dumps(body, indent=2)}" + ) + + raise CLIInternalError(f"{step_name} LRO timed out after {max_polls * retry_after}s") + + +def _extract_solution_version_id(review_result): + """Extract solution-version-id from review response.""" + if not review_result or not isinstance(review_result, dict): + raise CLIInternalError("Review returned no result - cannot determine solution version ID.") + + # The LRO response structure: + # {id, name, status, properties: {id: , properties: {...}, ...}} + # The solution version ARM ID is at properties.id (NOT properties.properties.id) + props = review_result.get("properties", {}) + + sv_id = ( + props.get("id") # properties.id (most common) + or review_result.get("solutionVersionId") # top-level fallback + or props.get("solutionVersionId") # properties.solutionVersionId + or (props.get("properties", {}) or {}).get("id") # properties.properties.id + ) + if not sv_id: + logger.warning("Could not extract solutionVersionId. Keys at top: %s, inner keys: %s, full (truncated): %s", + list(review_result.keys()), + list(inner.keys()) if isinstance(inner, dict) else "N/A", + json.dumps(review_result, indent=2)[:800]) + raise CLIInternalError( + "Review succeeded but no solutionVersionId found in response. " + "Use --resume-from publish --solution-version-id to continue manually." + ) + return sv_id + + +def _short_id(arm_id): + """Return the last segment of an ARM ID for display.""" + if not arm_id: + return "" + parts = arm_id.strip("/").split("/") + return parts[-1] if parts else arm_id diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/target_prepare.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/target_prepare.py new file mode 100644 index 00000000000..7f3204a64d9 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/target_prepare.py @@ -0,0 +1,707 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Target prepare command - prepares an Arc-connected K8s cluster for WO. + +Installs cert-manager, trust-manager, WO extension, and creates a custom +location. Idempotent - skips components that already exist. + +Usage: + az workload-orchestration target prepare \\ + --cluster-name my-cluster -g my-rg -l eastus +""" + +# pylint: disable=broad-exception-caught +# pylint: disable=too-many-locals +# pylint: disable=too-many-statements +# pylint: disable=too-many-branches +# pylint: disable=import-outside-toplevel + +import json +import os +import subprocess +import logging + +from azure.cli.core.azclierror import ( + CLIInternalError, + ValidationError, +) + +from azext_workload_orchestration.onboarding.consts import ( + DEFAULT_CERT_MANAGER_VERSION, + CERT_MANAGER_MANIFEST_URL, + CERT_MANAGER_NAMESPACE, + CERT_MANAGER_WEBHOOK_DEPLOYMENT, + CERT_MANAGER_MIN_PODS, + CERT_MANAGER_WAIT_TIMEOUT, + TRUST_MANAGER_DEPLOYMENT, + TRUST_MANAGER_HELM_REPO, + TRUST_MANAGER_HELM_REPO_NAME, + TRUST_MANAGER_HELM_CHART, + DEFAULT_EXTENSION_TYPE, + DEFAULT_EXTENSION_NAME, + DEFAULT_RELEASE_TRAIN, + DEFAULT_EXTENSION_NAMESPACE, + DEFAULT_EXTENSION_SCOPE, +) +from azext_workload_orchestration.onboarding.utils import ( + invoke_cli_command, + print_step, + print_success, + print_detail, +) + +from azure.cli.core.util import send_raw_request + +logger = logging.getLogger(__name__) + +TOTAL_STEPS = 4 + + +def target_prepare( + cmd, + cluster_name, + resource_group, + location, + extension_name=None, + custom_location_name=None, + extension_version=None, + release_train=None, + cert_manager_version=None, + skip_cert_manager=False, + skip_trust_manager=False, + kube_config=None, + kube_context=None, + no_wait=False, +): + """Prepare an Arc-connected K8s cluster for Workload Orchestration. + + Installs cert-manager, trust-manager, WO extension, and creates a custom + location. Skips components that are already installed (idempotent). + """ + extension_name = extension_name or DEFAULT_EXTENSION_NAME + custom_location_name = custom_location_name or f"{cluster_name}-cl" + release_train = release_train or DEFAULT_RELEASE_TRAIN + cert_manager_version = cert_manager_version or DEFAULT_CERT_MANAGER_VERSION + + print(f"\nPreparing cluster '{cluster_name}' for Workload Orchestration...\n") + + # Track step results for diagnostic summary + step_results = {} + + # Pre-flight: verify cluster is Arc-connected and features enabled + try: + connected_cluster_id = _preflight_checks(cmd, cluster_name, resource_group) + step_results["preflight"] = "Passed" + except Exception as exc: + step_results["preflight"] = f"FAILED: {exc}" + _print_diagnostic_summary(step_results, cluster_name, resource_group) + raise + + # Step 1: cert-manager + try: + if skip_cert_manager: + print_step(1, TOTAL_STEPS, "cert-manager", "Skipped (--skip-cert-manager)") + step_results["cert-manager"] = "Skipped" + else: + _ensure_cert_manager(cert_manager_version, kube_config, kube_context) + step_results["cert-manager"] = "Succeeded" + except Exception as exc: + step_results["cert-manager"] = f"FAILED: {exc}" + logger.error("Step 1/4 failed (cert-manager): %s", exc) + _print_diagnostic_summary(step_results, cluster_name, resource_group) + raise CLIInternalError( + f"cert-manager installation failed: {exc}", + recommendation=( + "Check cluster connectivity and kubectl access. " + "Verify the cluster has internet access to github.com. " + "Try manually: kubectl apply -f https://github.com/cert-manager/" + f"cert-manager/releases/download/{cert_manager_version}/cert-manager.yaml" + ) + ) + + # Step 2: trust-manager + try: + if skip_trust_manager: + print_step(2, TOTAL_STEPS, "trust-manager", "Skipped (--skip-trust-manager)") + step_results["trust-manager"] = "Skipped" + else: + _ensure_trust_manager(kube_config, kube_context) + step_results["trust-manager"] = "Succeeded" + except CLIInternalError: + raise # Already has good error message (e.g., helm not installed) + except Exception as exc: + step_results["trust-manager"] = f"FAILED: {exc}" + logger.error("Step 2/4 failed (trust-manager): %s", exc) + _print_diagnostic_summary(step_results, cluster_name, resource_group) + raise CLIInternalError( + f"trust-manager installation failed: {exc}", + recommendation=( + "Ensure helm is installed and the cluster can reach charts.jetstack.io. " + "Try manually: helm upgrade trust-manager jetstack/trust-manager " + "--install --namespace cert-manager --wait" + ) + ) + + # Step 3: WO extension + try: + extension_id = _ensure_wo_extension( + cmd, cluster_name, resource_group, extension_name, + extension_version, release_train, no_wait, + kube_config, kube_context + ) + step_results["wo-extension"] = "Succeeded" + except Exception as exc: + step_results["wo-extension"] = f"FAILED: {exc}" + logger.error("Step 3/4 failed (WO extension): %s", exc) + _print_diagnostic_summary(step_results, cluster_name, resource_group) + raise CLIInternalError( + f"WO extension installation failed: {exc}", + recommendation=( + "Common causes:\n" + " - Wrong release train for this region (try --release-train preview or dev)\n" + " - Insufficient cluster resources (need 2+ CPU cores, 4Gi+ memory)\n" + " - Storage class not available (check: kubectl get sc)\n" + "Try manually: az k8s-extension create -g {rg} --cluster-name {cluster} " + "--cluster-type connectedClusters --name {ext} " + "--extension-type Microsoft.workloadorchestration --scope cluster " + f"--release-train {release_train}" + ).format(rg=resource_group, cluster=cluster_name, ext=extension_name) + ) + + # Step 4: Custom location + try: + cl_id = _ensure_custom_location( + cmd, cluster_name, resource_group, location, + custom_location_name, extension_id, connected_cluster_id + ) + step_results["custom-location"] = "Succeeded" + except Exception as exc: + step_results["custom-location"] = f"FAILED: {exc}" + logger.error("Step 4/4 failed (Custom location): %s", exc) + _print_diagnostic_summary(step_results, cluster_name, resource_group) + raise CLIInternalError( + f"Custom location creation failed: {exc}", + recommendation=( + "Ensure custom-locations feature is enabled:\n" + f" az connectedk8s enable-features -n {cluster_name} " + f"-g {resource_group} --features cluster-connect custom-locations\n" + "Also verify the extension is in 'Succeeded' state:\n" + f" az k8s-extension show -g {resource_group} " + f"--cluster-name {cluster_name} --cluster-type connectedClusters " + f"--name {extension_name}" + ) + ) + + # Output extended-location.json + extended_location = {"name": cl_id, "type": "CustomLocation"} + _write_extended_location_file(extended_location) + + print_success(f"Cluster '{cluster_name}' is ready for Workload Orchestration") + print_detail("Custom Location ID", cl_id) + print() + + return { + "clusterName": cluster_name, + "customLocationId": cl_id, + "extensionId": extension_id, + "extendedLocation": extended_location, + "connectedClusterId": connected_cluster_id, + } + + +# --------------------------------------------------------------------------- +# Pre-flight checks +# --------------------------------------------------------------------------- + +def _preflight_checks(cmd, cluster_name, resource_group): + """Verify cluster is Arc-connected and custom-locations feature enabled.""" + # Check cluster is Arc-connected + try: + cluster_info = invoke_cli_command( + cmd, + ["connectedk8s", "show", "-n", cluster_name, "-g", resource_group] + ) + except CLIInternalError: + raise ValidationError( + f"Cluster '{cluster_name}' is not Arc-connected or not found " + f"in resource group '{resource_group}'.", + recommendation=( + f"Run: az connectedk8s connect -g {resource_group} " + f"-n {cluster_name} -l " + ) + ) + + connected_cluster_id = cluster_info.get("id", "") + if not connected_cluster_id: + raise CLIInternalError( + f"Could not get resource ID for cluster '{cluster_name}'." + ) + + # Check custom-locations feature enabled + features = cluster_info.get("features", {}) + # Different API versions return this differently + cl_enabled = ( + features.get("customLocationsEnabled", False) + or cluster_info.get("properties", {}).get( + "customLocationsEnabled", False + ) + ) + # If we can't determine, proceed anyway - the custom location + # create step will fail with a clear error if not enabled + if cl_enabled is False: + logger.warning( + "custom-locations feature may not be enabled. " + "If custom location creation fails, run: " + "az connectedk8s enable-features -n %s -g %s " + "--features cluster-connect custom-locations", + cluster_name, resource_group + ) + + return connected_cluster_id + + +# --------------------------------------------------------------------------- +# Step 1: cert-manager +# --------------------------------------------------------------------------- + +def _ensure_cert_manager(version, kube_config, kube_context): + """Check if cert-manager is installed; install if missing.""" + try: + from kubernetes import client, config as k8s_config + from kubernetes.client.rest import ApiException + except ImportError: + raise CLIInternalError( + "kubernetes Python package is required.", + recommendation="Run: pip install kubernetes" + ) + + # Load kubeconfig + try: + k8s_config.load_kube_config( + config_file=kube_config, + context=kube_context + ) + except Exception as exc: + raise CLIInternalError( + f"Failed to load kubeconfig: {exc}", + recommendation=( + "Ensure kubectl is configured. " + "Use --kube-config and --kube-context if needed." + ) + ) + + v1 = client.CoreV1Api() + + # Check if cert-manager namespace exists with running pods + try: + v1.read_namespace(CERT_MANAGER_NAMESPACE) + pods = v1.list_namespaced_pod(CERT_MANAGER_NAMESPACE) + running = [ + p for p in pods.items + if p.status and p.status.phase == "Running" + ] + if len(running) >= CERT_MANAGER_MIN_PODS: + print_step( + 1, TOTAL_STEPS, "cert-manager", + f"Already installed [OK] ({len(running)} pods running)" + ) + return + logger.info( + "cert-manager namespace exists but only %d/%d pods running. Reinstalling.", + len(running), CERT_MANAGER_MIN_PODS + ) + except ApiException as exc: + if exc.status != 404: + raise CLIInternalError(f"Failed to check cert-manager: {exc}") + # 404 = namespace doesn't exist, proceed with install + + # Install cert-manager + print_step(1, TOTAL_STEPS, f"cert-manager... Installing {version}") + _run_kubectl([ + "apply", "-f", + CERT_MANAGER_MANIFEST_URL.format(version=version), + "--wait" + ], kube_config, kube_context) + + # Wait for webhook to be ready + _run_kubectl([ + "wait", "--for=condition=Available", + f"deployment/{CERT_MANAGER_WEBHOOK_DEPLOYMENT}", + "-n", CERT_MANAGER_NAMESPACE, + f"--timeout={CERT_MANAGER_WAIT_TIMEOUT}" + ], kube_config, kube_context) + + print_step(1, TOTAL_STEPS, "cert-manager", f"Installed {version} [OK]") + + +# --------------------------------------------------------------------------- +# Step 2: trust-manager +# --------------------------------------------------------------------------- + +def _ensure_trust_manager(kube_config, kube_context): + """Check if trust-manager is installed; install via helm if missing.""" + try: + from kubernetes import client, config as k8s_config + from kubernetes.client.rest import ApiException + except ImportError: + raise CLIInternalError( + "kubernetes Python package is required.", + recommendation="Run: pip install kubernetes" + ) + + # Load kubeconfig (may already be loaded from cert-manager step) + try: + k8s_config.load_kube_config( + config_file=kube_config, + context=kube_context + ) + except Exception: + pass # Already loaded, or will fail below + + apps_v1 = client.AppsV1Api() + + # Check if trust-manager deployment exists + try: + apps_v1.read_namespaced_deployment( + TRUST_MANAGER_DEPLOYMENT, CERT_MANAGER_NAMESPACE + ) + print_step(2, TOTAL_STEPS, "trust-manager", "Already installed [OK]") + return + except ApiException as exc: + if exc.status != 404: + raise CLIInternalError(f"Failed to check trust-manager: {exc}") + # 404 = not found, proceed with install + + # Check if helm is available + if not _is_helm_available(): + raise CLIInternalError( + "helm is required to install trust-manager.", + recommendation=( + "Install helm from https://helm.sh/docs/intro/install/ " + "and try again." + ) + ) + + # Install trust-manager via helm + print_step(2, TOTAL_STEPS, "trust-manager... Installing via helm") + + _run_command([ + "helm", "repo", "add", + TRUST_MANAGER_HELM_REPO_NAME, + TRUST_MANAGER_HELM_REPO, + "--force-update" + ]) + + _run_command([ + "helm", "upgrade", TRUST_MANAGER_DEPLOYMENT, + TRUST_MANAGER_HELM_CHART, + "--install", + "--namespace", CERT_MANAGER_NAMESPACE, + "--wait" + ]) + + print_step(2, TOTAL_STEPS, "trust-manager", "Installed [OK]") + + +# --------------------------------------------------------------------------- +# Step 3: WO extension +# --------------------------------------------------------------------------- + +def _ensure_wo_extension( + cmd, cluster_name, resource_group, extension_name, + extension_version, release_train, no_wait, + kube_config=None, kube_context=None +): + """Check if WO extension is installed; install if missing.""" + # Check existing extensions + try: + extensions = invoke_cli_command( + cmd, + [ + "k8s-extension", "list", + "-g", resource_group, + "--cluster-name", cluster_name, + "--cluster-type", "connectedClusters", + ] + ) + except CLIInternalError: + extensions = [] + + # Find WO extension that is actually working + wo_extensions = [ + ext for ext in (extensions or []) + if (ext.get("extensionType", "") or "").lower() + == DEFAULT_EXTENSION_TYPE.lower() + ] + + if wo_extensions: + ext = wo_extensions[0] + ext_id = ext.get("id", "") + ext_ver = ext.get("version", "unknown") + prov_state = ext.get("provisioningState", "").lower() + + if prov_state == "succeeded": + print_step( + 3, TOTAL_STEPS, "WO extension", + f"Already installed [OK] (version {ext_ver})" + ) + return ext_id + + # Install extension + version_msg = f" version {extension_version}" if extension_version else "" + print_step( + 3, TOTAL_STEPS, + f"WO extension... Creating '{extension_name}'{version_msg}" + ) + + create_args = [ + "k8s-extension", "create", + "-g", resource_group, + "--cluster-name", cluster_name, + "--cluster-type", "connectedClusters", + "--name", extension_name, + "--extension-type", DEFAULT_EXTENSION_TYPE, + "--scope", DEFAULT_EXTENSION_SCOPE, + "--release-train", release_train, + "--auto-upgrade", "false", + ] + if extension_version: + create_args.extend(["--version", extension_version]) + if no_wait: + create_args.append("--no-wait") + + # Auto-detect storage class and pass as config setting + storage_class = _detect_storage_class(kube_config, kube_context) + if storage_class: + create_args.extend([ + "--configuration-settings", + f"redis.persistentVolume.storageClass={storage_class}", + ]) + + result = invoke_cli_command(cmd, create_args) + ext_id = result.get("id", "") if isinstance(result, dict) else "" + + if no_wait: + print_step(3, TOTAL_STEPS, "WO extension", "Creating (--no-wait) [OK]") + else: + print_step(3, TOTAL_STEPS, "WO extension", "Installed [OK]") + + return ext_id + + +# --------------------------------------------------------------------------- +# Step 4: Custom location +# --------------------------------------------------------------------------- + +def _ensure_custom_location( + cmd, cluster_name, resource_group, location, + custom_location_name, extension_id, connected_cluster_id +): + """Check if custom location exists; create if missing.""" + # Check existing - use REST directly to avoid CLI error output on 404 + sub_id = _get_sub_id(cmd) + cl_arm_url = ( + f"https://management.azure.com/subscriptions" + f"/{sub_id}/resourceGroups/{resource_group}" + f"/providers/Microsoft.ExtendedLocation" + f"/customLocations/{custom_location_name}" + ) + try: + response = send_raw_request( + cmd.cli_ctx, + method="GET", + url=f"{cl_arm_url}?api-version=2021-08-15", + resource="https://management.azure.com" + ) + if response.status_code == 200 and response.text: + cl_info = response.json() + cl_id = cl_info.get("id", "") + if cl_id: + print_step( + 4, TOTAL_STEPS, "Custom location", + f"Already exists [OK] ('{custom_location_name}')" + ) + return cl_id + except Exception: + pass # Not found or error, proceed to create + + if not extension_id: + raise CLIInternalError( + "Cannot create custom location: WO extension ID is not available.", + recommendation=( + "Ensure the WO extension was installed successfully. " + "Re-run without --no-wait." + ) + ) + + print_step( + 4, TOTAL_STEPS, + f"Custom location... Creating '{custom_location_name}'" + ) + + try: + result = invoke_cli_command( + cmd, + [ + "customlocation", "create", + "-g", resource_group, + "-n", custom_location_name, + "--cluster-extension-ids", extension_id, + "--host-resource-id", connected_cluster_id, + "--namespace", DEFAULT_EXTENSION_NAMESPACE, + "--location", location, + ] + ) + cl_id = result.get("id", "") if isinstance(result, dict) else "" + except CLIInternalError as exc: + raise CLIInternalError( + f"Failed to create custom location: {exc}", + recommendation=( + "This can happen if the 'custom-locations' feature is not enabled. " + f"Run: az connectedk8s enable-features -n {cluster_name} " + f"-g {resource_group} --features cluster-connect custom-locations" + ) + ) + + print_step(4, TOTAL_STEPS, "Custom location", "Created [OK]") + return cl_id + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _detect_storage_class(kube_config=None, kube_context=None): + """Auto-detect the default storage class from the cluster.""" + try: + from kubernetes import client, config as k8s_config + k8s_config.load_kube_config( + config_file=kube_config, context=kube_context + ) + storage_v1 = client.StorageV1Api() + scs = storage_v1.list_storage_class() + # Prefer the default storage class + for sc in scs.items: + annotations = sc.metadata.annotations or {} + if annotations.get("storageclass.kubernetes.io/is-default-class") == "true": + logger.info("Auto-detected default storage class: %s", sc.metadata.name) + return sc.metadata.name + # Fallback: first available storage class + if scs.items: + name = scs.items[0].metadata.name + logger.info("No default storage class found, using first: %s", name) + return name + except Exception as exc: + logger.warning("Could not detect storage class: %s", exc) + return None + + +def _print_diagnostic_summary(step_results, cluster_name, resource_group): + """Print a diagnostic summary showing what succeeded/failed. + + This gives the DRI/support engineer a quick picture of where things + went wrong when a customer reports an issue. + """ + from datetime import datetime, timezone + + print("\n" + "=" * 60) + print(" Diagnostic Summary") + print(f" Cluster: {cluster_name}") + print(f" Resource Group: {resource_group}") + print(f" Timestamp: {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}") + print("=" * 60) + + for step_name, result in step_results.items(): + if "FAILED" in result: + icon = "[FAIL]" + elif result == "Skipped": + icon = "○" + else: + icon = "[OK]" + print(f" {icon} {step_name}: {result}") + + has_failure = any("FAILED" in v for v in step_results.values()) + if has_failure: + print("\n [WARN] One or more steps failed. See error details above.") + print(" Re-run the command to retry - completed steps will be skipped.") + print("=" * 60 + "\n") + + +def _write_extended_location_file(extended_location): + """Write extended-location.json to the current working directory.""" + filepath = os.path.join(os.getcwd(), "extended-location.json") + with open(filepath, "w", encoding="utf-8") as f: + json.dump(extended_location, f, indent=2) + print(f"\n File written: {filepath}") + + +def _run_kubectl(args, kube_config=None, kube_context=None): + """Run a kubectl command with optional kubeconfig/context.""" + cmd_args = ["kubectl"] + if kube_config: + cmd_args.extend(["--kubeconfig", kube_config]) + if kube_context: + cmd_args.extend(["--context", kube_context]) + cmd_args.extend(args) + + logger.debug("Running: %s", " ".join(cmd_args)) + result = subprocess.run( # pylint: disable=subprocess-run-check + cmd_args, + capture_output=True, + encoding="utf-8", + errors="replace", + timeout=600, + ) + if result.returncode != 0: + error_msg = result.stderr.strip() or result.stdout.strip() + raise CLIInternalError( + f"kubectl command failed: {' '.join(args)}\n{error_msg}", + recommendation="Ensure kubectl is installed and cluster is reachable." + ) + return result.stdout + + +def _run_command(cmd_args): + """Run an arbitrary command (e.g., helm).""" + logger.debug("Running: %s", " ".join(cmd_args)) + result = subprocess.run( # pylint: disable=subprocess-run-check + cmd_args, + capture_output=True, + encoding="utf-8", + errors="replace", + timeout=600, + ) + if result.returncode != 0: + error_msg = result.stderr.strip() or result.stdout.strip() + raise CLIInternalError( + f"Command failed: {' '.join(cmd_args)}\n{error_msg}" + ) + return result.stdout + + +def _is_helm_available(): + """Check if helm is available in PATH.""" + try: + result = subprocess.run( # pylint: disable=subprocess-run-check + ["helm", "version", "--short"], + capture_output=True, + text=True, + timeout=10, + ) + return result.returncode == 0 + except FileNotFoundError: + return False + + +def _get_sub_id(cmd): + """Get subscription ID from CLI context.""" + try: + from azure.cli.core._profile import Profile + profile = Profile(cli_ctx=cmd.cli_ctx) + sub = profile.get_subscription() + return sub.get("id", "") + except Exception: + return "" diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/target_sg_link.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/target_sg_link.py new file mode 100644 index 00000000000..19b243a31e5 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/target_sg_link.py @@ -0,0 +1,112 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Service Group link helper — links a target to a service group after creation. + +After creating the ServiceGroupMember relationship, a target update (PUT) is +mandatory to refresh the target's hierarchy info. Without this, the target +appears unlinked in portal. (Confirmed from BVT code LinkServiceGroup().) + +Usage (called internally by target create --service-group): + link_target_to_service_group(cmd, target_id, service_group_name) +""" + +# pylint: disable=broad-exception-caught + +import json +import logging + +from azure.cli.core.azclierror import CLIInternalError + +from azext_workload_orchestration.onboarding.consts import ( + ARM_ENDPOINT, + SG_MEMBER_API_VERSION, + TARGET_API_VERSION, +) +from azext_workload_orchestration.onboarding.utils import ( + invoke_cli_command, +) + +logger = logging.getLogger(__name__) + + +def link_target_to_service_group(cmd, target_id, service_group_name): + """Link a target to a service group and refresh hierarchy. + + Two REST calls: + 1. PUT {targetId}/providers/Microsoft.Relationships/serviceGroupMember/{sgName} + 2. PUT {targetId} (update target to refresh hierarchy — MANDATORY) + """ + sg_member_url = ( + f"{ARM_ENDPOINT}{target_id}" + f"/providers/Microsoft.Relationships/serviceGroupMember/{service_group_name}" + ) + + # Step 1: Create ServiceGroupMember relationship + try: + invoke_cli_command(cmd, [ + "rest", + "--method", "put", + "--url", f"{sg_member_url}?api-version={SG_MEMBER_API_VERSION}", + "--body", json.dumps({ + "properties": { + "targetId": f"/providers/Microsoft.Management/serviceGroups/{service_group_name}" + } + }), + "--resource", ARM_ENDPOINT, + "--header", "Content-Type=application/json", + ], expect_json=False) + logger.info("ServiceGroupMember created: %s -> %s", target_id, service_group_name) + except Exception as exc: + raise CLIInternalError( + f"Failed to link target to service group '{service_group_name}': {exc}", + recommendation=( + f"Try manually:\n" + f" az rest --method put " + f"--url \"{sg_member_url}?api-version={SG_MEMBER_API_VERSION}\" " + f"--body \"{{\\\"properties\\\":{{\\\"targetId\\\":\\\"" + f"/providers/Microsoft.Management/serviceGroups/{service_group_name}" + f"\\\"}}}}\" " + f"--resource {ARM_ENDPOINT} --header Content-Type=application/json" + ) + ) + + # Step 2: Update target to refresh hierarchy (MANDATORY) + try: + # GET current target + target_data = invoke_cli_command(cmd, [ + "rest", + "--method", "get", + "--url", f"{ARM_ENDPOINT}{target_id}?api-version={TARGET_API_VERSION}", + "--resource", ARM_ENDPOINT, + ]) + + # PUT target (update to refresh hierarchy) + if target_data and isinstance(target_data, dict): + # Strip read-only fields, preserve writable top-level fields + body = { + "location": target_data.get("location", ""), + "properties": target_data.get("properties", {}), + } + if "extendedLocation" in target_data: + body["extendedLocation"] = target_data["extendedLocation"] + if "tags" in target_data: + body["tags"] = target_data["tags"] + + invoke_cli_command(cmd, [ + "rest", + "--method", "put", + "--url", f"{ARM_ENDPOINT}{target_id}?api-version={TARGET_API_VERSION}", + "--body", json.dumps(body), + "--resource", ARM_ENDPOINT, + "--header", "Content-Type=application/json", + ], expect_json=False) + logger.info("Target hierarchy refreshed after SG link") + + except Exception as exc: + logger.warning( + "Target hierarchy refresh after SG link may have failed: %s. " + "Target may appear unlinked until next update.", exc + ) diff --git a/src/workload-orchestration/azext_workload_orchestration/onboarding/utils.py b/src/workload-orchestration/azext_workload_orchestration/onboarding/utils.py new file mode 100644 index 00000000000..35463c36f19 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/onboarding/utils.py @@ -0,0 +1,170 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Shared utilities for onboarding simplification commands. + +Provides REST wrappers (using send_raw_request for automatic auth/retry/throttle), +LRO polling with Retry-After support, CLI command invocation, and progress output. +""" + +# pylint: disable=broad-exception-caught + +import json +import logging + +from azure.cli.core.azclierror import CLIInternalError + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# CmdProxy - bridge between AAZ hooks and helpers expecting cmd.cli_ctx +# --------------------------------------------------------------------------- + +class CmdProxy: + """Lightweight proxy to pass CLI context where a full cmd object is expected. + + AAZ-generated commands don't expose a cmd object in hooks, but many + helper functions expect cmd.cli_ctx. This proxy bridges the gap. + """ + def __init__(self, cli_ctx): + self.cli_ctx = cli_ctx + + +# --------------------------------------------------------------------------- +# ARM ID parsing +# --------------------------------------------------------------------------- + +def parse_arm_id(arm_id): + """Parse an ARM resource ID into a dict of segment name → value. + + Example: + parse_arm_id("/subscriptions/abc/resourceGroups/myRG/providers/Microsoft.Edge/contexts/myCtx") + → {"subscriptions": "abc", "resourcegroups": "myRG", "contexts": "myCtx"} + + Keys are lowercased for case-insensitive lookup. + Returns empty dict if arm_id is None or empty. + """ + if not arm_id: + return {} + parts = arm_id.strip("/").split("/") + result = {} + i = 0 + while i < len(parts) - 1: + result[parts[i].lower()] = parts[i + 1] + i += 2 + return result + + +# --------------------------------------------------------------------------- +# Silent CLI invocation +# --------------------------------------------------------------------------- + +def invoke_silent(cli_args): + """Invoke an az CLI command silently (suppress all stdout/stderr). + + Returns the exit code. Useful for fire-and-forget operations + where you don't need the output (e.g., setting config, creating + resources via 'az rest'). + """ + from azure.cli.core import get_default_cli + import io + import sys + + cli = get_default_cli() + old_stdout, old_stderr = sys.stdout, sys.stderr + sys.stdout = io.StringIO() + sys.stderr = io.StringIO() + try: + return cli.invoke(cli_args) + finally: + sys.stdout, sys.stderr = old_stdout, old_stderr + + +# --------------------------------------------------------------------------- +# CLI command invocation +# --------------------------------------------------------------------------- + +def invoke_cli_command(cmd, command_args, expect_json=True): + """Invoke another az CLI command in-process (shares auth context). + + Uses get_default_cli().invoke() so the child command shares + the same auth session, telemetry, and CLI context. + + Returns parsed JSON result if expect_json=True, raw result otherwise. + Raises CLIInternalError on non-zero exit. + """ + from azure.cli.core import get_default_cli + import io + import sys + + cli = get_default_cli() + if expect_json and "-o" not in command_args and "--output" not in command_args: + command_args = list(command_args) + ["-o", "json"] + + logger.debug("Invoking: az %s", " ".join(command_args)) + + # Suppress stdout/stderr from child command to avoid raw JSON noise + old_stdout = sys.stdout + old_stderr = sys.stderr + captured_out = io.StringIO() + captured_err = io.StringIO() + sys.stdout = captured_out + sys.stderr = captured_err + try: + exit_code = cli.invoke(command_args, out_file=captured_out) + except TypeError: + # Older CLI versions may not support out_file + exit_code = cli.invoke(command_args) + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + if exit_code != 0: + err_text = captured_err.getvalue().strip() + # cli.result may contain the error object from the CLI framework + cli_error = "" + if hasattr(cli, 'result') and hasattr(cli.result, 'error'): + cli_error = str(cli.result.error) if cli.result.error else "" + full_error = cli_error or err_text or f"exit code {exit_code}" + cmd_str = f"az {' '.join(command_args)}" + raise CLIInternalError(f"{full_error}\nCommand: {cmd_str}") + + result = cli.result.result + if expect_json and isinstance(result, str): + try: + return json.loads(result) + except (json.JSONDecodeError, TypeError): + pass + return result + + +# --------------------------------------------------------------------------- +# Progress output +# --------------------------------------------------------------------------- + +def print_step(step_num, total, message, status=""): + """Print a formatted step indicator. + + Examples: + [1/4] Installing cert-manager... + [1/4] Installing cert-manager... [OK] + [1/4] Installing cert-manager... Already installed [OK] + """ + prefix = f"[{step_num}/{total}]" + if status: + print(f"{prefix} {message}... {status}") + else: + print(f"{prefix} {message}...") + + +def print_success(message): + """Print a success summary line.""" + print(f"\n[OK] {message}") + + +def print_detail(label, value): + """Print a detail line (indented).""" + print(f" {label}: {value}") diff --git a/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/__init__.py b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/__init__.py new file mode 100644 index 00000000000..a9989856d22 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/__init__.py @@ -0,0 +1,10 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Unit tests for onboarding simplification commands. + +All tests use mocking — no live Azure/K8s calls. +Run: python -m pytest azext_workload_orchestration/tests/test_onboarding/ -v +""" diff --git a/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_hierarchy_create.py b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_hierarchy_create.py new file mode 100644 index 00000000000..52369e3db66 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_hierarchy_create.py @@ -0,0 +1,182 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Unit tests for hierarchy create command.""" + +import unittest +from unittest.mock import patch, MagicMock + +from azure.cli.core.azclierror import ValidationError + + +class TestHierarchyCreateValidation(unittest.TestCase): + """Test input validation for hierarchy create.""" + + def _get_mock_cmd(self): + cmd = MagicMock() + cmd.cli_ctx = MagicMock() + return cmd + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + def test_name_too_long_raises_error(self, _, __): + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + with self.assertRaises(ValidationError) as ctx: + hierarchy_create( + cmd, name='this-name-is-way-too-long-for-config', # 36 chars + resource_group='rg1', location='eastus', + level_label='Region', skip_context=True, + ) + + self.assertIn('24', str(ctx.exception)) + self.assertIn('36', str(ctx.exception)) + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + def test_name_exactly_24_passes(self, _, __): + """24-char name should not raise validation error (may fail at API call).""" + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + # This will pass validation but fail at API call (which we mock) + with patch('azext_workload_orchestration.onboarding.hierarchy_create._arm_put_quiet'): + with patch('azext_workload_orchestration.onboarding.hierarchy_create.invoke_cli_command'): + try: + hierarchy_create( + cmd, name='exactly-twenty-four-ch', # 24 chars + resource_group='rg1', location='eastus', + level_label='Region', skip_context=True, + ) + except Exception: + pass # May fail at later steps, that's fine + + +class TestHierarchyCreateFlow(unittest.TestCase): + """Test the SG → Site → Config → ConfigRef flow.""" + + def _get_mock_cmd(self): + cmd = MagicMock() + cmd.cli_ctx = MagicMock() + return cmd + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._arm_put_quiet') + @patch('azext_workload_orchestration.onboarding.hierarchy_create.invoke_cli_command') + def test_happy_path_skip_context(self, mock_invoke, mock_put, _, __): + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + result = hierarchy_create( + cmd, name='my-factory', resource_group='rg1', location='eastus', + level_label='Factory', skip_context=True, + ) + + self.assertEqual(result['name'], 'my-factory') + self.assertEqual(result['levelLabel'], 'Factory') + self.assertIn('serviceGroupId', result) + self.assertIn('siteId', result) + self.assertIn('configurationId', result) + # 4 PUT calls: SG, Site, Config, ConfigRef + self.assertEqual(mock_put.call_count, 4) + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._arm_put_quiet') + @patch('azext_workload_orchestration.onboarding.hierarchy_create.invoke_cli_command') + def test_parent_sets_correct_parent_id(self, mock_invoke, mock_put, _, __): + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + result = hierarchy_create( + cmd, name='my-factory', resource_group='rg1', location='eastus', + level_label='Factory', parent='my-region', skip_context=True, + ) + + # SG PUT should have parent = /providers/Microsoft.Management/serviceGroups/my-region + sg_call = mock_put.call_args_list[0] + sg_body = sg_call[0][2] # positional: cmd, url, body, api_version + self.assertEqual( + sg_body['properties']['parent']['resourceId'], + '/providers/Microsoft.Management/serviceGroups/my-region' + ) + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._arm_put_quiet') + @patch('azext_workload_orchestration.onboarding.hierarchy_create.invoke_cli_command') + def test_no_parent_uses_tenant_root(self, mock_invoke, mock_put, _, __): + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + result = hierarchy_create( + cmd, name='my-region', resource_group='rg1', location='eastus', + level_label='Region', skip_context=True, + ) + + sg_call = mock_put.call_args_list[0] + sg_body = sg_call[0][2] + self.assertEqual( + sg_body['properties']['parent']['resourceId'], + '/providers/Microsoft.Management/serviceGroups/test-tenant' + ) + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._arm_put_quiet') + @patch('azext_workload_orchestration.onboarding.hierarchy_create.invoke_cli_command') + def test_with_context_auto_creation(self, mock_invoke, mock_put, _, __): + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + # context current returns existing context + mock_invoke.return_value = {"name": "existing-ctx", "resourceGroup": "ctx-rg"} + + result = hierarchy_create( + cmd, name='my-region', resource_group='rg1', location='eastus', + level_label='Region', + ) + + self.assertEqual(result['contextName'], 'existing-ctx') + # contextAutoCreated is True when context was found (not explicitly provided) + self.assertTrue(result['contextAutoCreated']) + + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_sub_id', + return_value='test-sub') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._get_tenant_id', + return_value='test-tenant') + @patch('azext_workload_orchestration.onboarding.hierarchy_create._arm_put_quiet') + @patch('azext_workload_orchestration.onboarding.hierarchy_create.invoke_cli_command') + def test_site_url_uses_regional_endpoint(self, mock_invoke, mock_put, _, __): + from azext_workload_orchestration.onboarding.hierarchy_create import hierarchy_create + cmd = self._get_mock_cmd() + + hierarchy_create( + cmd, name='my-region', resource_group='rg1', location='westeurope', + level_label='Region', skip_context=True, + ) + + # Site PUT (2nd call) should use regional URL + site_call = mock_put.call_args_list[1] + site_url = site_call[0][1] # positional: cmd, url, body, api + self.assertIn('westeurope.management.azure.com', site_url) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_sg_link_and_utils.py b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_sg_link_and_utils.py new file mode 100644 index 00000000000..a4a203cdf61 --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_sg_link_and_utils.py @@ -0,0 +1,94 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Unit tests for service group link helper.""" + +import unittest +from unittest.mock import patch, MagicMock + +from azure.cli.core.azclierror import CLIInternalError + + +class TestServiceGroupLink(unittest.TestCase): + """Test target-to-service-group linking.""" + + def _get_mock_cmd(self): + cmd = MagicMock() + cmd.cli_ctx = MagicMock() + return cmd + + @patch('azext_workload_orchestration.onboarding.target_sg_link.invoke_cli_command') + def test_link_creates_member_and_refreshes_target(self, mock_invoke): + from azext_workload_orchestration.onboarding.target_sg_link import ( + link_target_to_service_group + ) + cmd = self._get_mock_cmd() + + # GET target returns existing data + mock_invoke.side_effect = [ + None, # SGMember PUT + { # GET target + "location": "eastus", + "properties": {"displayName": "t1"}, + "extendedLocation": {"name": "cl1", "type": "CustomLocation"}, + }, + None, # PUT target (refresh) + ] + + link_target_to_service_group(cmd, '/sub/rg/targets/t1', 'my-factory') + + # Should have 3 calls: SGMember PUT, GET target, PUT target + self.assertEqual(mock_invoke.call_count, 3) + + # Verify SGMember PUT URL contains service group name + sg_call_args = mock_invoke.call_args_list[0][0][1] + self.assertTrue(any('serviceGroupMember/my-factory' in a for a in sg_call_args)) + + @patch('azext_workload_orchestration.onboarding.target_sg_link.invoke_cli_command') + def test_link_failure_raises_cli_error(self, mock_invoke): + from azext_workload_orchestration.onboarding.target_sg_link import ( + link_target_to_service_group + ) + cmd = self._get_mock_cmd() + + mock_invoke.side_effect = CLIInternalError("SG not found") + + with self.assertRaises(CLIInternalError) as ctx: + link_target_to_service_group(cmd, '/sub/rg/targets/t1', 'bad-sg') + + self.assertIn('bad-sg', str(ctx.exception)) + + +class TestUtils(unittest.TestCase): + """Test shared utilities.""" + + def test_print_step_with_status(self): + from azext_workload_orchestration.onboarding.utils import print_step + # Should not raise + print_step(1, 4, "Installing cert-manager", "✓") + + def test_print_step_without_status(self): + from azext_workload_orchestration.onboarding.utils import print_step + print_step(2, 4, "Installing trust-manager") + + def test_print_success(self): + from azext_workload_orchestration.onboarding.utils import print_success + print_success("All done") + + def test_consts_values(self): + from azext_workload_orchestration.onboarding.consts import ( + MAX_HIERARCHY_NAME_LENGTH, + LRO_TIMEOUT_SECONDS, + DEFAULT_CERT_MANAGER_VERSION, + DEFAULT_EXTENSION_TYPE, + ) + self.assertEqual(MAX_HIERARCHY_NAME_LENGTH, 24) + self.assertEqual(LRO_TIMEOUT_SECONDS, 600) + self.assertEqual(DEFAULT_CERT_MANAGER_VERSION, 'v1.15.3') + self.assertEqual(DEFAULT_EXTENSION_TYPE, 'Microsoft.workloadorchestration') + + +if __name__ == '__main__': + unittest.main() diff --git a/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_target_prepare.py b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_target_prepare.py new file mode 100644 index 00000000000..2ea32aef82c --- /dev/null +++ b/src/workload-orchestration/azext_workload_orchestration/tests/test_onboarding/test_target_prepare.py @@ -0,0 +1,152 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +"""Unit tests for target prepare command.""" + +import unittest +from unittest.mock import patch, MagicMock + +from azure.cli.core.azclierror import CLIInternalError, ValidationError + + +class TestTargetPreparePreFlight(unittest.TestCase): + """Test pre-flight checks for target prepare.""" + + def _get_mock_cmd(self): + cmd = MagicMock() + cmd.cli_ctx = MagicMock() + return cmd + + @patch('azext_workload_orchestration.onboarding.target_prepare.invoke_cli_command') + def test_cluster_not_arc_connected_raises_error(self, mock_invoke): + from azext_workload_orchestration.onboarding.target_prepare import _preflight_checks + cmd = self._get_mock_cmd() + + mock_invoke.side_effect = CLIInternalError("Not found") + + with self.assertRaises(ValidationError) as ctx: + _preflight_checks(cmd, 'my-cluster', 'my-rg') + + self.assertIn('not Arc-connected', str(ctx.exception)) + + @patch('azext_workload_orchestration.onboarding.target_prepare.invoke_cli_command') + def test_arc_connected_returns_cluster_id(self, mock_invoke): + from azext_workload_orchestration.onboarding.target_prepare import _preflight_checks + cmd = self._get_mock_cmd() + + mock_invoke.return_value = { + "id": "/subscriptions/sub1/resourceGroups/rg1/providers/Microsoft.Kubernetes/connectedClusters/my-cluster", + "name": "my-cluster", + } + + result = _preflight_checks(cmd, 'my-cluster', 'my-rg') + self.assertIn('connectedClusters/my-cluster', result) + + +class TestTargetPrepareCertManager(unittest.TestCase): + """Test cert-manager detection.""" + + def test_cert_manager_function_exists(self): + """Verify _ensure_cert_manager function is importable.""" + from azext_workload_orchestration.onboarding.target_prepare import _ensure_cert_manager + self.assertTrue(callable(_ensure_cert_manager)) + + +class TestTargetPrepareHelm(unittest.TestCase): + """Test helm detection.""" + + @patch('subprocess.run') + def test_helm_available(self, mock_run): + from azext_workload_orchestration.onboarding.target_prepare import _is_helm_available + mock_run.return_value = MagicMock(returncode=0) + self.assertTrue(_is_helm_available()) + + @patch('subprocess.run') + def test_helm_not_available(self, mock_run): + from azext_workload_orchestration.onboarding.target_prepare import _is_helm_available + mock_run.side_effect = FileNotFoundError() + self.assertFalse(_is_helm_available()) + + +class TestTargetPrepareExtension(unittest.TestCase): + """Test WO extension detection and install.""" + + def _get_mock_cmd(self): + cmd = MagicMock() + cmd.cli_ctx = MagicMock() + return cmd + + @patch('azext_workload_orchestration.onboarding.target_prepare.invoke_cli_command') + @patch('azext_workload_orchestration.onboarding.target_prepare._detect_storage_class', + return_value='default') + def test_extension_already_installed_succeeds_skips(self, _, mock_invoke): + from azext_workload_orchestration.onboarding.target_prepare import _ensure_wo_extension + cmd = self._get_mock_cmd() + + mock_invoke.return_value = [ + { + "extensionType": "microsoft.workloadorchestration", + "id": "/sub/rg/ext/wo-ext", + "version": "2.1.11", + "provisioningState": "Succeeded", + } + ] + + result = _ensure_wo_extension( + cmd, 'cluster1', 'rg1', 'wo-ext', None, 'preview', False + ) + + self.assertEqual(result, '/sub/rg/ext/wo-ext') + # Only list was called, not create + mock_invoke.assert_called_once() + + @patch('azext_workload_orchestration.onboarding.target_prepare.invoke_cli_command') + @patch('azext_workload_orchestration.onboarding.target_prepare._detect_storage_class', + return_value='default') + def test_failed_extension_gets_deleted_and_reinstalled(self, _, mock_invoke): + from azext_workload_orchestration.onboarding.target_prepare import _ensure_wo_extension + cmd = self._get_mock_cmd() + + call_count = [0] + def side_effect(*args, **kwargs): + call_count[0] += 1 + if call_count[0] == 1: + # First call: list returns failed extension + return [{ + "extensionType": "microsoft.workloadorchestration", + "id": "/sub/rg/ext/wo-ext", + "name": "wo-ext", + "version": "2.1.11", + "provisioningState": "Failed", + }] + elif call_count[0] == 2: + # Second call: delete + return None + else: + # Third call: create + return {"id": "/sub/rg/ext/wo-ext-new"} + + mock_invoke.side_effect = side_effect + + result = _ensure_wo_extension( + cmd, 'cluster1', 'rg1', 'wo-ext', None, 'preview', False + ) + + # Should have called: list, delete, create + self.assertEqual(mock_invoke.call_count, 3) + + +class TestTargetPrepareStorageClass(unittest.TestCase): + """Test storage class auto-detection.""" + + def test_detect_returns_none_without_cluster(self): + """Without a real cluster, should return None gracefully.""" + from azext_workload_orchestration.onboarding.target_prepare import _detect_storage_class + result = _detect_storage_class("/nonexistent/kubeconfig", "bad-context") + self.assertIsNone(result) + + +if __name__ == '__main__': + unittest.main()