From 87480d9456c1e01fb14db1a909b4a65feed74059 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:32:09 +0000 Subject: [PATCH 01/21] aks: support BYO VNet for Automatic SKU HOBO clusters and fix upgrade crash Add --system-node-subnet-id, --node-subnet-id, --disable-hosted-system to 'az aks create'. When the subnet trio (system-node, node, apiserver) is supplied on --sku automatic, the cluster is created with an MC hosted_system_profile carrying BYO subnets; the Enabled flag is left unset so the server decides the default. --disable-hosted-system deterministically opts an Automatic cluster out of HOBO. Validate the BYO VNet trio up front: - Partial trio -> RequiredArgumentMissingError listing missing flags. - Trio without --sku automatic -> RequiredArgumentMissingError. - --disable-hosted-system + any subnet flag -> MutuallyExclusiveArgumentError. - --disable-hosted-system without --sku automatic -> RequiredArgumentMissingError. Fix 'az aks upgrade' / 'az aks scale' crash on HOBO clusters where agent_pool_profiles can be None server-side ('NoneType is not iterable'). Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_help.py | 19 +++ .../azure/cli/command_modules/acs/_params.py | 8 ++ .../cli/command_modules/acs/_validators.py | 8 ++ .../azure/cli/command_modules/acs/custom.py | 10 +- .../acs/managed_cluster_decorator.py | 116 ++++++++++++++++++ 5 files changed, 158 insertions(+), 3 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 7a89268ed9f..ed234297773 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -345,6 +345,25 @@ - name: --apiserver-subnet-id type: string short-summary: The ID of a subnet in an existing VNet into which to assign control plane apiserver pods(requires --enable-apiserver-vnet-integration) + - name: --system-node-subnet-id + type: string + short-summary: (Automatic SKU) Subnet ID of an existing VNet for the hosted system node pool (BYO VNet HOBO). + long-summary: | + When provided alongside `--node-subnet-id` and `--apiserver-subnet-id` on `--sku automatic`, + the cluster is created with BYO VNet for its Hosted Overlay System Pool. All three subnets + must be supplied together. + - name: --node-subnet-id + type: string + short-summary: (Automatic SKU) Subnet ID of an existing VNet for user node pools (BYO VNet HOBO). + long-summary: | + Used together with `--system-node-subnet-id` and `--apiserver-subnet-id` on `--sku automatic` + to bring your own VNet for the cluster. + - name: --disable-hosted-system + type: bool + short-summary: (Automatic SKU) Deterministically opt this cluster out of the Hosted Overlay System Pool (HOBO). + long-summary: | + Only valid on `--sku automatic`. Cannot be combined with `--system-node-subnet-id` + or `--node-subnet-id`. - name: --enable-private-cluster type: string short-summary: Enable private cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 49e2d17f585..f5fe583adee 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -133,6 +133,7 @@ validate_disable_windows_outbound_nat, validate_asm_egress_name, validate_crg_id, validate_apiserver_subnet_id, + validate_system_node_subnet_id, validate_node_subnet_id, validate_azure_service_mesh_revision, validate_message_of_the_day, validate_custom_ca_trust_certificates, @@ -443,6 +444,13 @@ def load_arguments(self, _): c.argument('enable_private_cluster', action='store_true') c.argument('enable_apiserver_vnet_integration', action='store_true') c.argument('apiserver_subnet_id', validator=validate_apiserver_subnet_id) + c.argument( + 'system_node_subnet_id', + options_list=['--system-node-subnet-id', '--sys-node-subnet-id'], + validator=validate_system_node_subnet_id, + ) + c.argument('node_subnet_id', validator=validate_node_subnet_id) + c.argument('disable_hosted_system', action='store_true') c.argument('private_dns_zone') c.argument('disable_public_fqdn', action='store_true') c.argument('service_principal') diff --git a/src/azure-cli/azure/cli/command_modules/acs/_validators.py b/src/azure-cli/azure/cli/command_modules/acs/_validators.py index 45ab88989d9..1a45d9ff49d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_validators.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_validators.py @@ -455,6 +455,14 @@ def validate_apiserver_subnet_id(namespace): _validate_subnet_id(namespace.apiserver_subnet_id, "--apiserver-subnet-id") +def validate_system_node_subnet_id(namespace): + _validate_subnet_id(namespace.system_node_subnet_id, "--system-node-subnet-id") + + +def validate_node_subnet_id(namespace): + _validate_subnet_id(namespace.node_subnet_id, "--node-subnet-id") + + def _validate_subnet_id(subnet_id, name): if subnet_id is None or subnet_id == '': return diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index b0d7ece1360..abe059d871e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1032,6 +1032,10 @@ def aks_create( # apiserver vnet integration enable_apiserver_vnet_integration=False, apiserver_subnet_id=None, + # BYO VNet HOBO (Automatic SKU) + system_node_subnet_id=None, + node_subnet_id=None, + disable_hosted_system=False, # node provisioning node_provisioning_mode=None, node_provisioning_default_pools=None, @@ -1273,7 +1277,7 @@ def aks_upgrade(cmd, instance = client.get(resource_group_name, name) vmas_cluster = False - for agent_profile in instance.agent_pool_profiles: + for agent_profile in (instance.agent_pool_profiles or []): if agent_profile.type.lower() == "availabilityset": vmas_cluster = True break @@ -1290,7 +1294,7 @@ def aks_upgrade(cmd, # This only provide convenience for customer at client side so they can run az aks upgrade to upgrade all # nodepools of a cluster. The SDK only support upgrade single nodepool at a time. - for agent_pool_profile in instance.agent_pool_profiles: + for agent_pool_profile in (instance.agent_pool_profiles or []): if vmas_cluster: raise CLIError('This cluster is using AvailabilitySet. Node image upgrade only operation ' 'can only be applied on VirtualMachineScaleSets or VirtualMachines cluster.') @@ -1354,7 +1358,7 @@ def aks_upgrade(cmd, return None if upgrade_all: - for agent_profile in instance.agent_pool_profiles: + for agent_profile in (instance.agent_pool_profiles or []): agent_profile.orchestrator_version = kubernetes_version agent_profile.creation_data = None diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index b24a7262f56..bdcc14f441e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4240,6 +4240,85 @@ def get_apiserver_subnet_id(self) -> Union[str, None]: """ return self._get_apiserver_subnet_id(enable_validation=True) + def get_system_node_subnet_id(self) -> Union[str, None]: + """Obtain the value of system_node_subnet_id (BYO VNet HOBO). + + :return: str or None + """ + system_node_subnet_id = self.raw_param.get("system_node_subnet_id") + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + self.mc.hosted_system_profile and + getattr(self.mc.hosted_system_profile, "system_node_subnet_id", None) is not None + ): + system_node_subnet_id = self.mc.hosted_system_profile.system_node_subnet_id + return system_node_subnet_id + + def get_node_subnet_id(self) -> Union[str, None]: + """Obtain the value of node_subnet_id (BYO VNet HOBO). + + :return: str or None + """ + node_subnet_id = self.raw_param.get("node_subnet_id") + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + self.mc.hosted_system_profile and + getattr(self.mc.hosted_system_profile, "node_subnet_id", None) is not None + ): + node_subnet_id = self.mc.hosted_system_profile.node_subnet_id + return node_subnet_id + + def get_disable_hosted_system(self) -> bool: + """Obtain the value of disable_hosted_system. + + :return: bool + """ + return bool(self.raw_param.get("disable_hosted_system")) + + def _validate_byo_hobo_subnets(self) -> None: + """Validate BYO HOBO subnet trio and mutual exclusion with --disable-hosted-system. + + - If any of system-node / node / apiserver subnet is set, all three must be provided. + - --disable-hosted-system cannot be combined with any of the three subnets. + - BYO HOBO (subnet trio) requires --sku automatic. + """ + if self.decorator_mode != DecoratorMode.CREATE: + return + system_node_subnet_id = self.raw_param.get("system_node_subnet_id") + node_subnet_id = self.raw_param.get("node_subnet_id") + apiserver_subnet_id = self.raw_param.get("apiserver_subnet_id") + disable_hosted_system = self.get_disable_hosted_system() + + any_set = any([system_node_subnet_id, node_subnet_id]) + if disable_hosted_system and any_set: + raise MutuallyExclusiveArgumentError( + '"--disable-hosted-system" cannot be combined with ' + '"--system-node-subnet-id" or "--node-subnet-id".' + ) + if any_set: + missing = [] + if not system_node_subnet_id: + missing.append("--system-node-subnet-id") + if not node_subnet_id: + missing.append("--node-subnet-id") + if not apiserver_subnet_id: + missing.append("--apiserver-subnet-id") + if missing: + raise RequiredArgumentMissingError( + "BYO VNet for Automatic (HOBO) clusters requires all three subnets. " + "Missing: " + ", ".join(missing) + "." + ) + if self.get_sku_name() != CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: + raise RequiredArgumentMissingError( + '"--system-node-subnet-id" / "--node-subnet-id" require "--sku automatic".' + ) + if disable_hosted_system and self.get_sku_name() != CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: + raise RequiredArgumentMissingError( + '"--disable-hosted-system" requires "--sku automatic".' + ) + def _get_enable_private_cluster(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_private_cluster. @@ -7004,6 +7083,41 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster mc.fqdn_subdomain = fqdn_subdomain return mc + def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: + """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. + + - When any of `--system-node-subnet-id` / `--node-subnet-id` / `--apiserver-subnet-id` + are provided (BYO VNet HOBO), validate the trio and populate + `mc.hosted_system_profile.{system_node_subnet_id, node_subnet_id}`. `enabled` is left + unset so the server side keeps ownership of the default/opt-in decision. + - When `--disable-hosted-system` is provided, set + `mc.hosted_system_profile = ManagedClusterHostedSystemProfile(enabled=False)` so + HOBO is deterministically opted out for Automatic clusters. + + :return: the ManagedCluster object + """ + self._ensure_mc(mc) + + # Run cross-flag validation (mutual exclusion + trio completeness + SKU gate) + self.context._validate_byo_hobo_subnets() + + system_node_subnet_id = self.context.get_system_node_subnet_id() + node_subnet_id = self.context.get_node_subnet_id() + disable_hosted_system = self.context.get_disable_hosted_system() + + if disable_hosted_system: + mc.hosted_system_profile = self.models.ManagedClusterHostedSystemProfile(enabled=False) + return mc + + if system_node_subnet_id or node_subnet_id: + if mc.hosted_system_profile is None: + mc.hosted_system_profile = self.models.ManagedClusterHostedSystemProfile() + if system_node_subnet_id: + mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id + if node_subnet_id: + mc.hosted_system_profile.node_subnet_id = node_subnet_id + return mc + def set_up_identity(self, mc: ManagedCluster) -> ManagedCluster: """Set up identity for the ManagedCluster object. @@ -7484,6 +7598,8 @@ def construct_mc_profile_default(self, bypass_restore_defaults: bool = False) -> mc = self.set_up_oidc_issuer_profile(mc) # set up api server access profile and fqdn subdomain mc = self.set_up_api_server_access_profile(mc) + # set up hosted system profile (BYO VNet HOBO) + mc = self.set_up_hosted_system_profile(mc) # set up identity mc = self.set_up_identity(mc) # set up identity profile From 997ac9c16a52676ed121f82f194f9e326dbbd28a Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:38:08 +0000 Subject: [PATCH 02/21] test: add unit tests for BYO HOBO validator Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../latest/test_managed_cluster_decorator.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 4d73ef0e470..2c3397c32fa 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -4426,6 +4426,95 @@ def test_get_apiserver_subnet_id(self): with self.assertRaises(RequiredArgumentMissingError): ctx_6.get_apiserver_subnet_id() + def test_byo_hobo_subnets_validation(self): + system_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/sys" + node_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/nod" + api_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/api" + + # disable + subnet -> MutuallyExclusiveArgumentError + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "disable_hosted_system": True, + "system_node_subnet_id": system_subnet, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + with self.assertRaises(MutuallyExclusiveArgumentError): + ctx._validate_byo_hobo_subnets() + + # partial trio -> RequiredArgumentMissingError + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "system_node_subnet_id": system_subnet, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + with self.assertRaises(RequiredArgumentMissingError): + ctx._validate_byo_hobo_subnets() + + # trio without --sku automatic -> RequiredArgumentMissingError + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "base", + "system_node_subnet_id": system_subnet, + "node_subnet_id": node_subnet, + "apiserver_subnet_id": api_subnet, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + with self.assertRaises(RequiredArgumentMissingError): + ctx._validate_byo_hobo_subnets() + + # disable_hosted_system without automatic -> RequiredArgumentMissingError + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "base", + "disable_hosted_system": True, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + with self.assertRaises(RequiredArgumentMissingError): + ctx._validate_byo_hobo_subnets() + + # happy path: full trio + automatic + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "system_node_subnet_id": system_subnet, + "node_subnet_id": node_subnet, + "apiserver_subnet_id": api_subnet, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx._validate_byo_hobo_subnets() + self.assertEqual(ctx.get_system_node_subnet_id(), system_subnet) + self.assertEqual(ctx.get_node_subnet_id(), node_subnet) + + # happy path: disable + automatic + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "disable_hosted_system": True, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx._validate_byo_hobo_subnets() + self.assertTrue(ctx.get_disable_hosted_system()) + def test_get_private_dns_zone(self): # default ctx_1 = AKSManagedClusterContext( From f91ce35304abecee16a7b8db37114bc29db57e18 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:50:31 +0000 Subject: [PATCH 03/21] aks: address review feedback for BYO HOBO PR * Simplify --system-node-subnet-id registration (drop the --sys-node-subnet-id alias so the linter picks up help correctly). * Relax _get_apiserver_subnet_id CREATE-time check: don't require --vnet-subnet-id when BYO HOBO subnets are set, since system-node/node subnets replace vnet-subnet-id on --sku automatic. * Run _validate_byo_hobo_subnets up front in set_up_api_server_access_profile so the targeted "require --sku automatic" error beats the generic --apiserver-subnet-id messaging. * Also fix aks_scale against HOBO clusters where agent_pool_profiles is None (same crash Qizhe hit with aks_upgrade): guard with `or []` and return a user-friendly error for empty pools. * Add linter_exclusions entries for the three new parameters (missing_parameter_test_coverage) to keep azdev-linter green without recorded scenario tests at this stage. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_params.py | 6 +---- .../azure/cli/command_modules/acs/custom.py | 11 +++++--- .../command_modules/acs/linter_exclusions.yml | 9 +++++++ .../acs/managed_cluster_decorator.py | 27 ++++++++++++++++--- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index f5fe583adee..0e7854db1df 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -444,11 +444,7 @@ def load_arguments(self, _): c.argument('enable_private_cluster', action='store_true') c.argument('enable_apiserver_vnet_integration', action='store_true') c.argument('apiserver_subnet_id', validator=validate_apiserver_subnet_id) - c.argument( - 'system_node_subnet_id', - options_list=['--system-node-subnet-id', '--sys-node-subnet-id'], - validator=validate_system_node_subnet_id, - ) + c.argument('system_node_subnet_id', validator=validate_system_node_subnet_id) c.argument('node_subnet_id', validator=validate_node_subnet_id) c.argument('disable_hosted_system', action='store_true') c.argument('private_dns_zone') diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index abe059d871e..eae687536c1 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1442,12 +1442,17 @@ def _upgrade_single_nodepool_image_version(no_wait, client, resource_group_name, def aks_scale(cmd, client, resource_group_name, name, node_count, nodepool_name="", no_wait=False): instance = client.get(resource_group_name, name) - if len(instance.agent_pool_profiles) > 1 and nodepool_name == "": + agent_pool_profiles = instance.agent_pool_profiles or [] + if not agent_pool_profiles: + raise CLIError('The cluster has no scalable node pools (this may be a Hosted System Pool / Automatic cluster). ' + 'Use az aks nodepool add/scale against a user node pool instead.') + + if len(agent_pool_profiles) > 1 and nodepool_name == "": raise CLIError('There are more than one node pool in the cluster. ' 'Please specify nodepool name or use az aks nodepool command to scale node pool') - for agent_profile in instance.agent_pool_profiles: - if agent_profile.name == nodepool_name or (nodepool_name == "" and len(instance.agent_pool_profiles) == 1): + for agent_profile in agent_pool_profiles: + if agent_profile.name == nodepool_name or (nodepool_name == "" and len(agent_pool_profiles) == 1): if agent_profile.enable_auto_scaling: raise CLIError( "Cannot scale cluster autoscaler enabled node pool.") diff --git a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml index 39692b2f608..e52a4216d73 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml @@ -1,6 +1,15 @@ --- aks create: parameters: + system_node_subnet_id: + rule_exclusions: + - missing_parameter_test_coverage + node_subnet_id: + rule_exclusions: + - missing_parameter_test_coverage + disable_hosted_system: + rule_exclusions: + - missing_parameter_test_coverage appgw_watch_namespace: rule_exclusions: - option_length_too_long diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index bdcc14f441e..f3b4c0682be 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4211,7 +4211,14 @@ def _get_apiserver_subnet_id(self, enable_validation: bool = False) -> Union[str if enable_validation: if self.decorator_mode == DecoratorMode.CREATE: vnet_subnet_id = self.get_vnet_subnet_id() - if apiserver_subnet_id and vnet_subnet_id is None: + # For BYO VNet HOBO (--sku automatic with subnet trio), --vnet-subnet-id is + # not used: system-node / node subnets replace it. Only require --vnet-subnet-id + # when neither --system-node-subnet-id nor --node-subnet-id is provided. + byo_hobo_subnets_set = ( + self.raw_param.get("system_node_subnet_id") or + self.raw_param.get("node_subnet_id") + ) + if apiserver_subnet_id and vnet_subnet_id is None and not byo_hobo_subnets_set: raise RequiredArgumentMissingError( '"--apiserver-subnet-id" requires "--vnet-subnet-id".') @@ -4291,13 +4298,20 @@ def _validate_byo_hobo_subnets(self) -> None: apiserver_subnet_id = self.raw_param.get("apiserver_subnet_id") disable_hosted_system = self.get_disable_hosted_system() - any_set = any([system_node_subnet_id, node_subnet_id]) - if disable_hosted_system and any_set: + hobo_specific_set = bool(system_node_subnet_id or node_subnet_id) + any_trio_set = bool(hobo_specific_set or apiserver_subnet_id) + + # --disable-hosted-system is mutually exclusive with any HOBO-specific subnet flag. + # (We deliberately don't include --apiserver-subnet-id here: it keeps its existing + # general-purpose meaning for --enable-apiserver-vnet-integration flows.) + if disable_hosted_system and hobo_specific_set: raise MutuallyExclusiveArgumentError( '"--disable-hosted-system" cannot be combined with ' '"--system-node-subnet-id" or "--node-subnet-id".' ) - if any_set: + + # Partial trio: if any HOBO-specific subnet is set, require the full trio. + if hobo_specific_set: missing = [] if not system_node_subnet_id: missing.append("--system-node-subnet-id") @@ -4318,6 +4332,7 @@ def _validate_byo_hobo_subnets(self) -> None: raise RequiredArgumentMissingError( '"--disable-hosted-system" requires "--sku automatic".' ) + _ = any_trio_set # reserved for future per-flag gating def _get_enable_private_cluster(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_private_cluster. @@ -7057,6 +7072,10 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster """ self._ensure_mc(mc) + # Run BYO HOBO trio validation first so clearer errors surface before the + # generic --apiserver-subnet-id checks inside _get_apiserver_subnet_id. + self.context._validate_byo_hobo_subnets() + api_server_access_profile = None api_server_authorized_ip_ranges = self.context.get_api_server_authorized_ip_ranges() enable_private_cluster = self.context.get_enable_private_cluster() From a229daa341f44d3b2d1ae20f6e6f16a4f3d83498 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:38:36 +0000 Subject: [PATCH 04/21] aks: fix BYO VNet HOBO create to produce RP-accepted payload When customers pass --system-node-subnet-id / --node-subnet-id / --apiserver-subnet-id on --sku automatic to bring their own VNet for HOBO, the CLI was producing a payload the RP rejected: 1. apiServerAccessProfile.enableVnetIntegration was not set, so the RP treated the cluster as default-VNet while subnetId was populated and returned ApiserverSubnetConfigError. Auto-wire enable_vnet_integration whenever the BYO HOBO subnet trio is present. 2. hostedSystemProfile.enabled was left unset, so the RP could not distinguish BYO HOBO from default mode. Set enabled=True when the subnet trio is provided. 3. agentPoolProfiles contained the default system pool, which the RP rejected because HOBO manages node pools itself. Clear agent_pool_profiles in BYO HOBO mode, matching the preview path. 4. outbound_type defaulted to managedNATGateway for Automatic SKU, which the RP disallows on BYO VNet. Keep the user's explicit value (or let it default to loadBalancer) when the BYO trio is provided. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index f3b4c0682be..0f3c779ab12 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2407,7 +2407,19 @@ def _get_outbound_type( skuName = self.get_sku_name() isVnetSubnetIdEmpty = self.get_vnet_subnet_id() in ["", None] - if skuName is not None and skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and isVnetSubnetIdEmpty: + # For BYO VNet HOBO (Automatic SKU with system-node/node subnet trio), the user's + # subnet IDs replace --vnet-subnet-id; don't force ManagedNATGateway in that case. + byo_hobo_subnets_set = bool( + self.raw_param.get("system_node_subnet_id") or + self.raw_param.get("node_subnet_id") + ) + if ( + skuName is not None and + skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and + isVnetSubnetIdEmpty and + not byo_hobo_subnets_set and + self.raw_param.get("outbound_type") is None + ): # outbound_type of Automatic SKU should be ManagedNATGateway if no subnet id provided. outbound_type = CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY @@ -7096,6 +7108,14 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster if api_server_access_profile is None: api_server_access_profile = self.models.ManagedClusterAPIServerAccessProfile() api_server_access_profile.subnet_id = self.context.get_apiserver_subnet_id() + # BYO VNet HOBO (Automatic SKU) requires apiserver VNet integration. When the + # BYO HOBO subnet trio is provided, auto-enable vnet integration so users are + # not forced to pass --enable-apiserver-vnet-integration alongside the subnet IDs. + if ( + self.context.get_system_node_subnet_id() or + self.context.get_node_subnet_id() + ): + api_server_access_profile.enable_vnet_integration = True mc.api_server_access_profile = api_server_access_profile fqdn_subdomain = self.context.get_fqdn_subdomain() @@ -7131,10 +7151,18 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: if system_node_subnet_id or node_subnet_id: if mc.hosted_system_profile is None: mc.hosted_system_profile = self.models.ManagedClusterHostedSystemProfile() + # BYO VNet HOBO requires explicit enablement so the RP treats this as + # a BYO VNet cluster (not default-vnet) when BYO subnets are supplied. + mc.hosted_system_profile.enabled = True if system_node_subnet_id: mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id if node_subnet_id: mc.hosted_system_profile.node_subnet_id = node_subnet_id + # The HOBO server manages node pools; drop the default agent pool so the + # RP doesn't reject the request for having an unrelated default nodepool + # in a VNet other than the BYO HOBO trio's VNet. + if mc.agent_pool_profiles is not None: + mc.agent_pool_profiles = None return mc def set_up_identity(self, mc: ManagedCluster) -> ManagedCluster: From 4de3d9a1ad795507809980dee05c2d26cc9a4200 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:06:52 +0000 Subject: [PATCH 05/21] aks: skip agent-pool update step for HOBO clusters on aks update HOBO (Automatic SKU Hosted Overlay System Pool) clusters have agent_pool_profiles=null on the RP side because node pools are server-managed. update_agentpool_profile was raising 'Encounter an unexpected error while getting agent pool profiles...' on any 'az aks update' against a HOBO cluster (including 'az aks update --sku base' for Automatic-to-Base downgrade). Skip that step when hostedSystemProfile.enabled is true. Also refines the Automatic-SKU outbound-type override: keep the existing 'default to ManagedNATGateway when no user value and no vnet subnet' behavior unchanged; the BYO-HOBO exemption added in the prior commit is already enough. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../cli/command_modules/acs/managed_cluster_decorator.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 0f3c779ab12..be1b8b0d0d7 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2417,8 +2417,7 @@ def _get_outbound_type( skuName is not None and skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and isVnetSubnetIdEmpty and - not byo_hobo_subnets_set and - self.raw_param.get("outbound_type") is None + not byo_hobo_subnets_set ): # outbound_type of Automatic SKU should be ManagedNATGateway if no subnet id provided. outbound_type = CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY @@ -8147,6 +8146,12 @@ def update_agentpool_profile(self, mc: ManagedCluster) -> ManagedCluster: """ self._ensure_mc(mc) + # HOBO (Hosted Overlay System Pool) clusters manage node pools on the + # server side and surface `agent_pool_profiles` as None. Skip the + # default agent pool update in that case. + if mc.hosted_system_profile and mc.hosted_system_profile.enabled: + return mc + if not mc.agent_pool_profiles: raise UnknownError( "Encounter an unexpected error while getting agent pool profiles from the cluster in the process of " From 03ddbc9527f64f2dceed16727712d43cf595bf15 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:12:57 +0000 Subject: [PATCH 06/21] aks: clarify BYO HOBO validator docstring and drop dead var Address Copilot review feedback on PR #33259: - Clarify _validate_byo_hobo_subnets docstring: BYO VNet HOBO is triggered only by --system-node-subnet-id / --node-subnet-id. --apiserver-subnet-id keeps its existing general-purpose meaning for --enable-apiserver-vnet-integration flows on non-HOBO clusters, so it is deliberately not part of the trigger or the mutual-exclusion set. - Remove the unused 'any_trio_set' placeholder variable. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index be1b8b0d0d7..9786b8c5f1f 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4298,9 +4298,17 @@ def get_disable_hosted_system(self) -> bool: def _validate_byo_hobo_subnets(self) -> None: """Validate BYO HOBO subnet trio and mutual exclusion with --disable-hosted-system. - - If any of system-node / node / apiserver subnet is set, all three must be provided. - - --disable-hosted-system cannot be combined with any of the three subnets. - - BYO HOBO (subnet trio) requires --sku automatic. + BYO VNet HOBO is triggered by --system-node-subnet-id / --node-subnet-id + (the HOBO-specific flags). --apiserver-subnet-id is intentionally NOT part of the + trigger because it keeps its existing general-purpose meaning for + --enable-apiserver-vnet-integration flows on non-HOBO clusters. + + - If either --system-node-subnet-id or --node-subnet-id is set, the full trio + (--system-node-subnet-id, --node-subnet-id, --apiserver-subnet-id) must be + provided and --sku must be automatic. + - --disable-hosted-system is mutually exclusive with the HOBO-specific + subnet flags (--system-node-subnet-id, --node-subnet-id) and also requires + --sku automatic. """ if self.decorator_mode != DecoratorMode.CREATE: return @@ -4310,7 +4318,6 @@ def _validate_byo_hobo_subnets(self) -> None: disable_hosted_system = self.get_disable_hosted_system() hobo_specific_set = bool(system_node_subnet_id or node_subnet_id) - any_trio_set = bool(hobo_specific_set or apiserver_subnet_id) # --disable-hosted-system is mutually exclusive with any HOBO-specific subnet flag. # (We deliberately don't include --apiserver-subnet-id here: it keeps its existing @@ -4343,7 +4350,6 @@ def _validate_byo_hobo_subnets(self) -> None: raise RequiredArgumentMissingError( '"--disable-hosted-system" requires "--sku automatic".' ) - _ = any_trio_set # reserved for future per-flag gating def _get_enable_private_cluster(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_private_cluster. From 930a55e459d90bbd80ab4a62ca3923cc65e90775 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:46:25 +0000 Subject: [PATCH 07/21] aks: rename _validate_byo_hobo_subnets to validate_byo_hobo_subnets Fix pylint W0212 (protected-access) reported by CI: the validator is called across classes (AKSManagedClusterCreateDecorator accessing AKSManagedClusterContext), so it should be a public method. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../command_modules/acs/managed_cluster_decorator.py | 6 +++--- .../tests/latest/test_managed_cluster_decorator.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 9786b8c5f1f..5c219e31f6d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4295,7 +4295,7 @@ def get_disable_hosted_system(self) -> bool: """ return bool(self.raw_param.get("disable_hosted_system")) - def _validate_byo_hobo_subnets(self) -> None: + def validate_byo_hobo_subnets(self) -> None: """Validate BYO HOBO subnet trio and mutual exclusion with --disable-hosted-system. BYO VNet HOBO is triggered by --system-node-subnet-id / --node-subnet-id @@ -7091,7 +7091,7 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster # Run BYO HOBO trio validation first so clearer errors surface before the # generic --apiserver-subnet-id checks inside _get_apiserver_subnet_id. - self.context._validate_byo_hobo_subnets() + self.context.validate_byo_hobo_subnets() api_server_access_profile = None api_server_authorized_ip_ranges = self.context.get_api_server_authorized_ip_ranges() @@ -7143,7 +7143,7 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: self._ensure_mc(mc) # Run cross-flag validation (mutual exclusion + trio completeness + SKU gate) - self.context._validate_byo_hobo_subnets() + self.context.validate_byo_hobo_subnets() system_node_subnet_id = self.context.get_system_node_subnet_id() node_subnet_id = self.context.get_node_subnet_id() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 2c3397c32fa..45b234cce76 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -4443,7 +4443,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(MutuallyExclusiveArgumentError): - ctx._validate_byo_hobo_subnets() + ctx.validate_byo_hobo_subnets() # partial trio -> RequiredArgumentMissingError ctx = AKSManagedClusterContext( @@ -4456,7 +4456,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(RequiredArgumentMissingError): - ctx._validate_byo_hobo_subnets() + ctx.validate_byo_hobo_subnets() # trio without --sku automatic -> RequiredArgumentMissingError ctx = AKSManagedClusterContext( @@ -4471,7 +4471,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(RequiredArgumentMissingError): - ctx._validate_byo_hobo_subnets() + ctx.validate_byo_hobo_subnets() # disable_hosted_system without automatic -> RequiredArgumentMissingError ctx = AKSManagedClusterContext( @@ -4484,7 +4484,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(RequiredArgumentMissingError): - ctx._validate_byo_hobo_subnets() + ctx.validate_byo_hobo_subnets() # happy path: full trio + automatic ctx = AKSManagedClusterContext( @@ -4498,7 +4498,7 @@ def test_byo_hobo_subnets_validation(self): self.models, decorator_mode=DecoratorMode.CREATE, ) - ctx._validate_byo_hobo_subnets() + ctx.validate_byo_hobo_subnets() self.assertEqual(ctx.get_system_node_subnet_id(), system_subnet) self.assertEqual(ctx.get_node_subnet_id(), node_subnet) @@ -4512,7 +4512,7 @@ def test_byo_hobo_subnets_validation(self): self.models, decorator_mode=DecoratorMode.CREATE, ) - ctx._validate_byo_hobo_subnets() + ctx.validate_byo_hobo_subnets() self.assertTrue(ctx.get_disable_hosted_system()) def test_get_private_dns_zone(self): From 69049ddbfec27aee3e7e7233a84cc2762db411a3 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:49:35 +0000 Subject: [PATCH 08/21] aks: refresh set_up_hosted_system_profile docstring Docstring still described the earlier 'enabled left unset' behavior, but the code now sets enabled=True on BYO VNet HOBO trio (required so the RP treats the request as BYO rather than default-VNet mode) and clears agent_pool_profiles because HOBO manages node pools server-side. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../command_modules/acs/managed_cluster_decorator.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 5c219e31f6d..af03fe79e27 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -7130,10 +7130,12 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. - - When any of `--system-node-subnet-id` / `--node-subnet-id` / `--apiserver-subnet-id` - are provided (BYO VNet HOBO), validate the trio and populate - `mc.hosted_system_profile.{system_node_subnet_id, node_subnet_id}`. `enabled` is left - unset so the server side keeps ownership of the default/opt-in decision. + - When the BYO VNet HOBO trio (`--system-node-subnet-id` / `--node-subnet-id` / + `--apiserver-subnet-id`) is provided, populate + `mc.hosted_system_profile.{enabled=True, system_node_subnet_id, node_subnet_id}` + and clear `mc.agent_pool_profiles` because HOBO manages node pools server-side. + The RP requires `enabled=True` to treat the request as BYO VNet rather than + default-VNet mode. - When `--disable-hosted-system` is provided, set `mc.hosted_system_profile = ManagedClusterHostedSystemProfile(enabled=False)` so HOBO is deterministically opted out for Automatic clusters. From ae6d22f3b0f6c749232627a5030ccaf1b5db69ca Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 05:03:51 +0000 Subject: [PATCH 09/21] aks: remove --disable-hosted-system and rename HOBO terminology Per review feedback: - Remove --disable-hosted-system flag entirely (PM decision). - Rename user-visible HOBO / Hosted Overlay System Pool terminology to Managed System Pool for Automatic cluster. - Drop associated getter, validator branch, param, linter exclusion, and related test cases. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_help.py | 14 +-- .../azure/cli/command_modules/acs/_params.py | 1 - .../azure/cli/command_modules/acs/custom.py | 7 +- .../command_modules/acs/linter_exclusions.yml | 3 - .../acs/managed_cluster_decorator.py | 112 +++++++----------- .../latest/test_managed_cluster_decorator.py | 40 ------- 6 files changed, 48 insertions(+), 129 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index ed234297773..0ec67e96be1 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -347,23 +347,17 @@ short-summary: The ID of a subnet in an existing VNet into which to assign control plane apiserver pods(requires --enable-apiserver-vnet-integration) - name: --system-node-subnet-id type: string - short-summary: (Automatic SKU) Subnet ID of an existing VNet for the hosted system node pool (BYO VNet HOBO). + short-summary: (Automatic SKU) Subnet ID of an existing VNet for the Managed System Pool for Automatic cluster. long-summary: | When provided alongside `--node-subnet-id` and `--apiserver-subnet-id` on `--sku automatic`, - the cluster is created with BYO VNet for its Hosted Overlay System Pool. All three subnets - must be supplied together. + the cluster is created with a bring-your-own VNet for its Managed System Pool. All three + subnets must be supplied together. - name: --node-subnet-id type: string - short-summary: (Automatic SKU) Subnet ID of an existing VNet for user node pools (BYO VNet HOBO). + short-summary: (Automatic SKU) Subnet ID of an existing VNet for user node pools of an Automatic cluster. long-summary: | Used together with `--system-node-subnet-id` and `--apiserver-subnet-id` on `--sku automatic` to bring your own VNet for the cluster. - - name: --disable-hosted-system - type: bool - short-summary: (Automatic SKU) Deterministically opt this cluster out of the Hosted Overlay System Pool (HOBO). - long-summary: | - Only valid on `--sku automatic`. Cannot be combined with `--system-node-subnet-id` - or `--node-subnet-id`. - name: --enable-private-cluster type: string short-summary: Enable private cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 0e7854db1df..b8120760b3e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -446,7 +446,6 @@ def load_arguments(self, _): c.argument('apiserver_subnet_id', validator=validate_apiserver_subnet_id) c.argument('system_node_subnet_id', validator=validate_system_node_subnet_id) c.argument('node_subnet_id', validator=validate_node_subnet_id) - c.argument('disable_hosted_system', action='store_true') c.argument('private_dns_zone') c.argument('disable_public_fqdn', action='store_true') c.argument('service_principal') diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index eae687536c1..fa2d0433c54 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1032,10 +1032,9 @@ def aks_create( # apiserver vnet integration enable_apiserver_vnet_integration=False, apiserver_subnet_id=None, - # BYO VNet HOBO (Automatic SKU) + # BYO VNet for Managed System Pool (Automatic SKU) system_node_subnet_id=None, node_subnet_id=None, - disable_hosted_system=False, # node provisioning node_provisioning_mode=None, node_provisioning_default_pools=None, @@ -1444,8 +1443,8 @@ def aks_scale(cmd, client, resource_group_name, name, node_count, nodepool_name= agent_pool_profiles = instance.agent_pool_profiles or [] if not agent_pool_profiles: - raise CLIError('The cluster has no scalable node pools (this may be a Hosted System Pool / Automatic cluster). ' - 'Use az aks nodepool add/scale against a user node pool instead.') + raise CLIError('The cluster has no scalable node pools (this may be a Managed System Pool for ' + 'Automatic cluster). Use az aks nodepool add/scale against a user node pool instead.') if len(agent_pool_profiles) > 1 and nodepool_name == "": raise CLIError('There are more than one node pool in the cluster. ' diff --git a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml index e52a4216d73..b37f38e729b 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml @@ -7,9 +7,6 @@ aks create: node_subnet_id: rule_exclusions: - missing_parameter_test_coverage - disable_hosted_system: - rule_exclusions: - - missing_parameter_test_coverage appgw_watch_namespace: rule_exclusions: - option_length_too_long diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index af03fe79e27..03ff3069ec3 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2407,9 +2407,9 @@ def _get_outbound_type( skuName = self.get_sku_name() isVnetSubnetIdEmpty = self.get_vnet_subnet_id() in ["", None] - # For BYO VNet HOBO (Automatic SKU with system-node/node subnet trio), the user's - # subnet IDs replace --vnet-subnet-id; don't force ManagedNATGateway in that case. - byo_hobo_subnets_set = bool( + # For BYO VNet Managed System Pool (Automatic SKU with system-node/node subnet trio), + # the user's subnet IDs replace --vnet-subnet-id; don't force ManagedNATGateway in that case. + byo_subnets_set = bool( self.raw_param.get("system_node_subnet_id") or self.raw_param.get("node_subnet_id") ) @@ -2417,7 +2417,7 @@ def _get_outbound_type( skuName is not None and skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and isVnetSubnetIdEmpty and - not byo_hobo_subnets_set + not byo_subnets_set ): # outbound_type of Automatic SKU should be ManagedNATGateway if no subnet id provided. outbound_type = CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY @@ -4222,14 +4222,15 @@ def _get_apiserver_subnet_id(self, enable_validation: bool = False) -> Union[str if enable_validation: if self.decorator_mode == DecoratorMode.CREATE: vnet_subnet_id = self.get_vnet_subnet_id() - # For BYO VNet HOBO (--sku automatic with subnet trio), --vnet-subnet-id is - # not used: system-node / node subnets replace it. Only require --vnet-subnet-id - # when neither --system-node-subnet-id nor --node-subnet-id is provided. - byo_hobo_subnets_set = ( + # For BYO VNet Managed System Pool (--sku automatic with subnet trio), + # --vnet-subnet-id is not used: system-node / node subnets replace it. Only + # require --vnet-subnet-id when neither --system-node-subnet-id nor + # --node-subnet-id is provided. + byo_subnets_set = ( self.raw_param.get("system_node_subnet_id") or self.raw_param.get("node_subnet_id") ) - if apiserver_subnet_id and vnet_subnet_id is None and not byo_hobo_subnets_set: + if apiserver_subnet_id and vnet_subnet_id is None and not byo_subnets_set: raise RequiredArgumentMissingError( '"--apiserver-subnet-id" requires "--vnet-subnet-id".') @@ -4259,7 +4260,7 @@ def get_apiserver_subnet_id(self) -> Union[str, None]: return self._get_apiserver_subnet_id(enable_validation=True) def get_system_node_subnet_id(self) -> Union[str, None]: - """Obtain the value of system_node_subnet_id (BYO VNet HOBO). + """Obtain the value of system_node_subnet_id (BYO VNet for Automatic cluster). :return: str or None """ @@ -4274,7 +4275,7 @@ def get_system_node_subnet_id(self) -> Union[str, None]: return system_node_subnet_id def get_node_subnet_id(self) -> Union[str, None]: - """Obtain the value of node_subnet_id (BYO VNet HOBO). + """Obtain the value of node_subnet_id (BYO VNet for Automatic cluster). :return: str or None """ @@ -4288,48 +4289,28 @@ def get_node_subnet_id(self) -> Union[str, None]: node_subnet_id = self.mc.hosted_system_profile.node_subnet_id return node_subnet_id - def get_disable_hosted_system(self) -> bool: - """Obtain the value of disable_hosted_system. - - :return: bool - """ - return bool(self.raw_param.get("disable_hosted_system")) - def validate_byo_hobo_subnets(self) -> None: - """Validate BYO HOBO subnet trio and mutual exclusion with --disable-hosted-system. + """Validate the BYO VNet subnet trio for Managed System Pool (Automatic cluster). - BYO VNet HOBO is triggered by --system-node-subnet-id / --node-subnet-id - (the HOBO-specific flags). --apiserver-subnet-id is intentionally NOT part of the - trigger because it keeps its existing general-purpose meaning for - --enable-apiserver-vnet-integration flows on non-HOBO clusters. + BYO VNet for a Managed System Pool is triggered by --system-node-subnet-id / + --node-subnet-id. --apiserver-subnet-id is intentionally NOT part of the trigger + because it keeps its existing general-purpose meaning for + --enable-apiserver-vnet-integration flows on non-Automatic clusters. - If either --system-node-subnet-id or --node-subnet-id is set, the full trio (--system-node-subnet-id, --node-subnet-id, --apiserver-subnet-id) must be provided and --sku must be automatic. - - --disable-hosted-system is mutually exclusive with the HOBO-specific - subnet flags (--system-node-subnet-id, --node-subnet-id) and also requires - --sku automatic. """ if self.decorator_mode != DecoratorMode.CREATE: return system_node_subnet_id = self.raw_param.get("system_node_subnet_id") node_subnet_id = self.raw_param.get("node_subnet_id") apiserver_subnet_id = self.raw_param.get("apiserver_subnet_id") - disable_hosted_system = self.get_disable_hosted_system() - - hobo_specific_set = bool(system_node_subnet_id or node_subnet_id) - # --disable-hosted-system is mutually exclusive with any HOBO-specific subnet flag. - # (We deliberately don't include --apiserver-subnet-id here: it keeps its existing - # general-purpose meaning for --enable-apiserver-vnet-integration flows.) - if disable_hosted_system and hobo_specific_set: - raise MutuallyExclusiveArgumentError( - '"--disable-hosted-system" cannot be combined with ' - '"--system-node-subnet-id" or "--node-subnet-id".' - ) + byo_specific_set = bool(system_node_subnet_id or node_subnet_id) - # Partial trio: if any HOBO-specific subnet is set, require the full trio. - if hobo_specific_set: + # Partial trio: if any BYO subnet is set, require the full trio. + if byo_specific_set: missing = [] if not system_node_subnet_id: missing.append("--system-node-subnet-id") @@ -4339,17 +4320,13 @@ def validate_byo_hobo_subnets(self) -> None: missing.append("--apiserver-subnet-id") if missing: raise RequiredArgumentMissingError( - "BYO VNet for Automatic (HOBO) clusters requires all three subnets. " - "Missing: " + ", ".join(missing) + "." + "BYO VNet for a Managed System Pool (Automatic cluster) requires all three " + "subnets. Missing: " + ", ".join(missing) + "." ) if self.get_sku_name() != CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: raise RequiredArgumentMissingError( '"--system-node-subnet-id" / "--node-subnet-id" require "--sku automatic".' ) - if disable_hosted_system and self.get_sku_name() != CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: - raise RequiredArgumentMissingError( - '"--disable-hosted-system" requires "--sku automatic".' - ) def _get_enable_private_cluster(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_private_cluster. @@ -7089,7 +7066,7 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster """ self._ensure_mc(mc) - # Run BYO HOBO trio validation first so clearer errors surface before the + # Run BYO VNet trio validation first so clearer errors surface before the # generic --apiserver-subnet-id checks inside _get_apiserver_subnet_id. self.context.validate_byo_hobo_subnets() @@ -7113,9 +7090,10 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster if api_server_access_profile is None: api_server_access_profile = self.models.ManagedClusterAPIServerAccessProfile() api_server_access_profile.subnet_id = self.context.get_apiserver_subnet_id() - # BYO VNet HOBO (Automatic SKU) requires apiserver VNet integration. When the - # BYO HOBO subnet trio is provided, auto-enable vnet integration so users are - # not forced to pass --enable-apiserver-vnet-integration alongside the subnet IDs. + # BYO VNet for Managed System Pool (Automatic SKU) requires apiserver VNet + # integration. When the BYO subnet trio is provided, auto-enable vnet + # integration so users are not forced to pass --enable-apiserver-vnet-integration + # alongside the subnet IDs. if ( self.context.get_system_node_subnet_id() or self.context.get_node_subnet_id() @@ -7130,44 +7108,36 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. - - When the BYO VNet HOBO trio (`--system-node-subnet-id` / `--node-subnet-id` / - `--apiserver-subnet-id`) is provided, populate - `mc.hosted_system_profile.{enabled=True, system_node_subnet_id, node_subnet_id}` - and clear `mc.agent_pool_profiles` because HOBO manages node pools server-side. - The RP requires `enabled=True` to treat the request as BYO VNet rather than - default-VNet mode. - - When `--disable-hosted-system` is provided, set - `mc.hosted_system_profile = ManagedClusterHostedSystemProfile(enabled=False)` so - HOBO is deterministically opted out for Automatic clusters. + When the BYO VNet trio (`--system-node-subnet-id` / `--node-subnet-id` / + `--apiserver-subnet-id`) is provided, populate + `mc.hosted_system_profile.{enabled=True, system_node_subnet_id, node_subnet_id}` + and clear `mc.agent_pool_profiles` because the Managed System Pool manages node + pools server-side. The RP requires `enabled=True` to treat the request as BYO + VNet rather than default-VNet mode. :return: the ManagedCluster object """ self._ensure_mc(mc) - # Run cross-flag validation (mutual exclusion + trio completeness + SKU gate) + # Run cross-flag validation (trio completeness + SKU gate) self.context.validate_byo_hobo_subnets() system_node_subnet_id = self.context.get_system_node_subnet_id() node_subnet_id = self.context.get_node_subnet_id() - disable_hosted_system = self.context.get_disable_hosted_system() - - if disable_hosted_system: - mc.hosted_system_profile = self.models.ManagedClusterHostedSystemProfile(enabled=False) - return mc if system_node_subnet_id or node_subnet_id: if mc.hosted_system_profile is None: mc.hosted_system_profile = self.models.ManagedClusterHostedSystemProfile() - # BYO VNet HOBO requires explicit enablement so the RP treats this as - # a BYO VNet cluster (not default-vnet) when BYO subnets are supplied. + # BYO VNet requires explicit enablement so the RP treats this as a BYO VNet + # cluster (not default-vnet) when BYO subnets are supplied. mc.hosted_system_profile.enabled = True if system_node_subnet_id: mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id if node_subnet_id: mc.hosted_system_profile.node_subnet_id = node_subnet_id - # The HOBO server manages node pools; drop the default agent pool so the - # RP doesn't reject the request for having an unrelated default nodepool - # in a VNet other than the BYO HOBO trio's VNet. + # The Managed System Pool manages node pools; drop the default agent pool so + # the RP doesn't reject the request for having an unrelated default nodepool + # in a VNet other than the BYO trio's VNet. if mc.agent_pool_profiles is not None: mc.agent_pool_profiles = None return mc @@ -7652,7 +7622,7 @@ def construct_mc_profile_default(self, bypass_restore_defaults: bool = False) -> mc = self.set_up_oidc_issuer_profile(mc) # set up api server access profile and fqdn subdomain mc = self.set_up_api_server_access_profile(mc) - # set up hosted system profile (BYO VNet HOBO) + # set up hosted system profile (BYO VNet for Managed System Pool) mc = self.set_up_hosted_system_profile(mc) # set up identity mc = self.set_up_identity(mc) @@ -8154,7 +8124,7 @@ def update_agentpool_profile(self, mc: ManagedCluster) -> ManagedCluster: """ self._ensure_mc(mc) - # HOBO (Hosted Overlay System Pool) clusters manage node pools on the + # Automatic clusters with a Managed System Pool manage node pools on the # server side and surface `agent_pool_profiles` as None. Skip the # default agent pool update in that case. if mc.hosted_system_profile and mc.hosted_system_profile.enabled: diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 45b234cce76..83767fd0203 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -4431,20 +4431,6 @@ def test_byo_hobo_subnets_validation(self): node_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/nod" api_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/api" - # disable + subnet -> MutuallyExclusiveArgumentError - ctx = AKSManagedClusterContext( - self.cmd, - AKSManagedClusterParamDict({ - "sku": "automatic", - "disable_hosted_system": True, - "system_node_subnet_id": system_subnet, - }), - self.models, - decorator_mode=DecoratorMode.CREATE, - ) - with self.assertRaises(MutuallyExclusiveArgumentError): - ctx.validate_byo_hobo_subnets() - # partial trio -> RequiredArgumentMissingError ctx = AKSManagedClusterContext( self.cmd, @@ -4473,19 +4459,6 @@ def test_byo_hobo_subnets_validation(self): with self.assertRaises(RequiredArgumentMissingError): ctx.validate_byo_hobo_subnets() - # disable_hosted_system without automatic -> RequiredArgumentMissingError - ctx = AKSManagedClusterContext( - self.cmd, - AKSManagedClusterParamDict({ - "sku": "base", - "disable_hosted_system": True, - }), - self.models, - decorator_mode=DecoratorMode.CREATE, - ) - with self.assertRaises(RequiredArgumentMissingError): - ctx.validate_byo_hobo_subnets() - # happy path: full trio + automatic ctx = AKSManagedClusterContext( self.cmd, @@ -4502,19 +4475,6 @@ def test_byo_hobo_subnets_validation(self): self.assertEqual(ctx.get_system_node_subnet_id(), system_subnet) self.assertEqual(ctx.get_node_subnet_id(), node_subnet) - # happy path: disable + automatic - ctx = AKSManagedClusterContext( - self.cmd, - AKSManagedClusterParamDict({ - "sku": "automatic", - "disable_hosted_system": True, - }), - self.models, - decorator_mode=DecoratorMode.CREATE, - ) - ctx.validate_byo_hobo_subnets() - self.assertTrue(ctx.get_disable_hosted_system()) - def test_get_private_dns_zone(self): # default ctx_1 = AKSManagedClusterContext( From 6d490a21faca5f6284a8021a14ba2641a2e3a5b9 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 05:27:50 +0000 Subject: [PATCH 10/21] aks: clarify BYO subnet help for Automatic cluster Rework short/long summaries for --system-node-subnet-id and --node-subnet-id so each flag clearly explains which pool it maps to (Managed System Pool vs user node pools) and states that the full three-subnet trio (including --apiserver-subnet-id) must belong to the same VNet and requires --sku automatic. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_help.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 0ec67e96be1..321fd3381a5 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -347,17 +347,20 @@ short-summary: The ID of a subnet in an existing VNet into which to assign control plane apiserver pods(requires --enable-apiserver-vnet-integration) - name: --system-node-subnet-id type: string - short-summary: (Automatic SKU) Subnet ID of an existing VNet for the Managed System Pool for Automatic cluster. + short-summary: (Automatic SKU) Subnet ID of an existing VNet to be used by the Managed System Pool in the Automatic cluster. long-summary: | - When provided alongside `--node-subnet-id` and `--apiserver-subnet-id` on `--sku automatic`, - the cluster is created with a bring-your-own VNet for its Managed System Pool. All three - subnets must be supplied together. + Bring-your-own VNet for an Automatic cluster requires three subnets supplied together: + `--system-node-subnet-id` (this flag, for the Managed System Pool), `--node-subnet-id` + (for user node pools), and `--apiserver-subnet-id` (for the control plane API server). + All three subnets must belong to the same VNet and can only be used with `--sku automatic`. - name: --node-subnet-id type: string - short-summary: (Automatic SKU) Subnet ID of an existing VNet for user node pools of an Automatic cluster. + short-summary: (Automatic SKU) Subnet ID of an existing VNet to be used by user node pools in the Automatic cluster. long-summary: | - Used together with `--system-node-subnet-id` and `--apiserver-subnet-id` on `--sku automatic` - to bring your own VNet for the cluster. + Bring-your-own VNet for an Automatic cluster requires three subnets supplied together: + `--system-node-subnet-id` (for the Managed System Pool), `--node-subnet-id` (this flag, + for user node pools), and `--apiserver-subnet-id` (for the control plane API server). + All three subnets must belong to the same VNet and can only be used with `--sku automatic`. - name: --enable-private-cluster type: string short-summary: Enable private cluster. From f4049ec2ff2578e4e45d7700a05f2d73402de199 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 05:33:29 +0000 Subject: [PATCH 11/21] aks: polish BYO subnet help wording and clarify agent_pool_profiles reset - Rewrite --system-node-subnet-id and --node-subnet-id short summaries to follow the 'The ID of a subnet in an existing VNet to be used by ...' style already used for --vnet-subnet-id. - Rewrite the comment above the BYO-path 'agent_pool_profiles = None' assignment to explain the real reason: on an Automatic cluster with BYO VNet, the RP provisions the system pool from hosted_system_profile, so the CLI-synthesized default agent pool entry conflicts with the BYO trio and must be cleared. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- src/azure-cli/azure/cli/command_modules/acs/_help.py | 4 ++-- .../cli/command_modules/acs/managed_cluster_decorator.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 321fd3381a5..cf1c76bb973 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -347,7 +347,7 @@ short-summary: The ID of a subnet in an existing VNet into which to assign control plane apiserver pods(requires --enable-apiserver-vnet-integration) - name: --system-node-subnet-id type: string - short-summary: (Automatic SKU) Subnet ID of an existing VNet to be used by the Managed System Pool in the Automatic cluster. + short-summary: (Automatic SKU) The ID of a subnet in an existing VNet to be used by the Managed System Pool in an Automatic cluster. long-summary: | Bring-your-own VNet for an Automatic cluster requires three subnets supplied together: `--system-node-subnet-id` (this flag, for the Managed System Pool), `--node-subnet-id` @@ -355,7 +355,7 @@ All three subnets must belong to the same VNet and can only be used with `--sku automatic`. - name: --node-subnet-id type: string - short-summary: (Automatic SKU) Subnet ID of an existing VNet to be used by user node pools in the Automatic cluster. + short-summary: (Automatic SKU) The ID of a subnet in an existing VNet to be used by user node pools in an Automatic cluster. long-summary: | Bring-your-own VNet for an Automatic cluster requires three subnets supplied together: `--system-node-subnet-id` (for the Managed System Pool), `--node-subnet-id` (this flag, diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 03ff3069ec3..3f7699804e1 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -7135,9 +7135,12 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id if node_subnet_id: mc.hosted_system_profile.node_subnet_id = node_subnet_id - # The Managed System Pool manages node pools; drop the default agent pool so - # the RP doesn't reject the request for having an unrelated default nodepool - # in a VNet other than the BYO trio's VNet. + # On an Automatic cluster with BYO VNet, the system pool is provisioned by + # the RP from `hosted_system_profile` (using `system_node_subnet_id`), so + # the CLI-synthesized default `agent_pool_profiles` entry is unnecessary + # and would conflict: its `vnet_subnet_id` is unset (or bound to the + # default VNet), which the RP rejects against the BYO trio. Clear it and + # let the RP populate pools from `hosted_system_profile`. if mc.agent_pool_profiles is not None: mc.agent_pool_profiles = None return mc From 5cf104139b90661765bd9fd768b09bdae4b52f97 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 17:58:03 +0000 Subject: [PATCH 12/21] aks: re-add --enable-hosted-system flag for deterministic HOBO opt-in Give power users an explicit way to request a Managed System Pool on Automatic SKU clusters, independent of the region-level default toggle. - `--enable-hosted-system` sets `hosted_system_profile.enabled=True` and clears the CLI-synthesized default agent pool. This avoids the ghost-pool problem on non-BYO Automatic clusters in toggle-ON regions where the RP auto-enables HOBO but the CLI still ships a default pool. - The BYO VNet subnet trio implies `--enable-hosted-system`, so existing BYO flows keep working unchanged. - `--enable-hosted-system` is gated to `--sku automatic`. Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_help.py | 8 +++ .../azure/cli/command_modules/acs/_params.py | 1 + .../azure/cli/command_modules/acs/custom.py | 1 + .../command_modules/acs/linter_exclusions.yml | 3 + .../acs/managed_cluster_decorator.py | 62 ++++++++++++++----- .../latest/test_managed_cluster_decorator.py | 27 ++++++++ 6 files changed, 85 insertions(+), 17 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index cf1c76bb973..e19e5c64936 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -361,6 +361,14 @@ `--system-node-subnet-id` (for the Managed System Pool), `--node-subnet-id` (this flag, for user node pools), and `--apiserver-subnet-id` (for the control plane API server). All three subnets must belong to the same VNet and can only be used with `--sku automatic`. + - name: --enable-hosted-system + type: bool + short-summary: (Automatic SKU) Explicitly opt in to a Managed System Pool for the Automatic cluster. + long-summary: | + Only valid with `--sku automatic`. Use this flag when you want to deterministically + request a Managed System Pool regardless of region defaults. It is also implied when + you supply the bring-your-own VNet subnet trio (`--system-node-subnet-id`, + `--node-subnet-id`, `--apiserver-subnet-id`). - name: --enable-private-cluster type: string short-summary: Enable private cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index b8120760b3e..17ec6e91a24 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -446,6 +446,7 @@ def load_arguments(self, _): c.argument('apiserver_subnet_id', validator=validate_apiserver_subnet_id) c.argument('system_node_subnet_id', validator=validate_system_node_subnet_id) c.argument('node_subnet_id', validator=validate_node_subnet_id) + c.argument('enable_hosted_system', action='store_true') c.argument('private_dns_zone') c.argument('disable_public_fqdn', action='store_true') c.argument('service_principal') diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index fa2d0433c54..2051e589ea4 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1035,6 +1035,7 @@ def aks_create( # BYO VNet for Managed System Pool (Automatic SKU) system_node_subnet_id=None, node_subnet_id=None, + enable_hosted_system=False, # node provisioning node_provisioning_mode=None, node_provisioning_default_pools=None, diff --git a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml index b37f38e729b..2e991452849 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/acs/linter_exclusions.yml @@ -7,6 +7,9 @@ aks create: node_subnet_id: rule_exclusions: - missing_parameter_test_coverage + enable_hosted_system: + rule_exclusions: + - missing_parameter_test_coverage appgw_watch_namespace: rule_exclusions: - option_length_too_long diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 3f7699804e1..6b576474d2a 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4289,8 +4289,25 @@ def get_node_subnet_id(self) -> Union[str, None]: node_subnet_id = self.mc.hosted_system_profile.node_subnet_id return node_subnet_id + def get_enable_hosted_system(self) -> bool: + """Obtain the value of enable_hosted_system. + + Returns True when the user explicitly opts in via --enable-hosted-system, + or implicitly via the BYO VNet subnet trio (which is HOBO-only). + + :return: bool + """ + if self.decorator_mode != DecoratorMode.CREATE: + return False + explicit = bool(self.raw_param.get("enable_hosted_system")) + implicit = bool( + self.raw_param.get("system_node_subnet_id") or + self.raw_param.get("node_subnet_id") + ) + return explicit or implicit + def validate_byo_hobo_subnets(self) -> None: - """Validate the BYO VNet subnet trio for Managed System Pool (Automatic cluster). + """Validate the BYO VNet subnet trio and the --enable-hosted-system flag. BYO VNet for a Managed System Pool is triggered by --system-node-subnet-id / --node-subnet-id. --apiserver-subnet-id is intentionally NOT part of the trigger @@ -4300,15 +4317,23 @@ def validate_byo_hobo_subnets(self) -> None: - If either --system-node-subnet-id or --node-subnet-id is set, the full trio (--system-node-subnet-id, --node-subnet-id, --apiserver-subnet-id) must be provided and --sku must be automatic. + - --enable-hosted-system is only valid with --sku automatic. """ if self.decorator_mode != DecoratorMode.CREATE: return system_node_subnet_id = self.raw_param.get("system_node_subnet_id") node_subnet_id = self.raw_param.get("node_subnet_id") apiserver_subnet_id = self.raw_param.get("apiserver_subnet_id") + enable_hosted_system = bool(self.raw_param.get("enable_hosted_system")) byo_specific_set = bool(system_node_subnet_id or node_subnet_id) + # --enable-hosted-system requires --sku automatic. + if enable_hosted_system and self.get_sku_name() != CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: + raise RequiredArgumentMissingError( + '"--enable-hosted-system" requires "--sku automatic".' + ) + # Partial trio: if any BYO subnet is set, require the full trio. if byo_specific_set: missing = [] @@ -7108,39 +7133,42 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. - When the BYO VNet trio (`--system-node-subnet-id` / `--node-subnet-id` / - `--apiserver-subnet-id`) is provided, populate - `mc.hosted_system_profile.{enabled=True, system_node_subnet_id, node_subnet_id}` - and clear `mc.agent_pool_profiles` because the Managed System Pool manages node - pools server-side. The RP requires `enabled=True` to treat the request as BYO - VNet rather than default-VNet mode. + Triggered when the user explicitly opts in via `--enable-hosted-system`, or + implicitly by supplying the BYO VNet subnet trio (`--system-node-subnet-id` / + `--node-subnet-id` / `--apiserver-subnet-id`). In either case: + - `mc.hosted_system_profile.enabled` is set to True so the RP treats this + as a Managed System Pool request. + - `system_node_subnet_id` / `node_subnet_id` are populated when supplied. + - `mc.agent_pool_profiles` is cleared. The CLI unconditionally synthesizes + a default agent pool via `set_up_agentpool_profile`; on a Managed System + Pool cluster the system pool is provisioned server-side from + `hosted_system_profile`, so the CLI default is stale and (in the BYO case) + actively conflicts with the BYO VNet. :return: the ManagedCluster object """ self._ensure_mc(mc) - # Run cross-flag validation (trio completeness + SKU gate) + # Run cross-flag validation (--enable-hosted-system SKU gate + BYO trio completeness) self.context.validate_byo_hobo_subnets() system_node_subnet_id = self.context.get_system_node_subnet_id() node_subnet_id = self.context.get_node_subnet_id() + enable_hosted_system = self.context.get_enable_hosted_system() - if system_node_subnet_id or node_subnet_id: + if enable_hosted_system: if mc.hosted_system_profile is None: mc.hosted_system_profile = self.models.ManagedClusterHostedSystemProfile() - # BYO VNet requires explicit enablement so the RP treats this as a BYO VNet - # cluster (not default-vnet) when BYO subnets are supplied. + # Explicit enablement so the RP treats this as a Managed System Pool cluster. mc.hosted_system_profile.enabled = True if system_node_subnet_id: mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id if node_subnet_id: mc.hosted_system_profile.node_subnet_id = node_subnet_id - # On an Automatic cluster with BYO VNet, the system pool is provisioned by - # the RP from `hosted_system_profile` (using `system_node_subnet_id`), so - # the CLI-synthesized default `agent_pool_profiles` entry is unnecessary - # and would conflict: its `vnet_subnet_id` is unset (or bound to the - # default VNet), which the RP rejects against the BYO trio. Clear it and - # let the RP populate pools from `hosted_system_profile`. + # Clear the CLI-synthesized default agent pool — the RP provisions the + # system pool from hosted_system_profile instead. Leaving it in causes + # the RP to reject BYO VNet clusters and produces a ghost pool on + # non-BYO Automatic clusters. if mc.agent_pool_profiles is not None: mc.agent_pool_profiles = None return mc diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 83767fd0203..8149cea25ce 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -4474,6 +4474,33 @@ def test_byo_hobo_subnets_validation(self): ctx.validate_byo_hobo_subnets() self.assertEqual(ctx.get_system_node_subnet_id(), system_subnet) self.assertEqual(ctx.get_node_subnet_id(), node_subnet) + self.assertTrue(ctx.get_enable_hosted_system()) # BYO trio implies enable_hosted_system + + # --enable-hosted-system without --sku automatic -> RequiredArgumentMissingError + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "base", + "enable_hosted_system": True, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + with self.assertRaises(RequiredArgumentMissingError): + ctx.validate_byo_hobo_subnets() + + # happy path: --enable-hosted-system alone on automatic + ctx = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "enable_hosted_system": True, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx.validate_byo_hobo_subnets() + self.assertTrue(ctx.get_enable_hosted_system()) def test_get_private_dns_zone(self): # default From 6aa0aa306e563ff552aa61500962a4a375ac4386 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 21:31:52 +0000 Subject: [PATCH 13/21] aks: grant BYO hosted-system subnet permissions Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 92 ++++++++++-- .../latest/test_managed_cluster_decorator.py | 138 ++++++++++++++++++ 2 files changed, 220 insertions(+), 10 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 6b576474d2a..75dd362e002 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -6454,6 +6454,19 @@ def set_up_service_principal_profile(self, mc: ManagedCluster) -> ManagedCluster mc.service_principal_profile = service_principal_profile return mc + def _get_byo_hosted_system_subnet_ids(self) -> List[str]: + if not self.context.get_enable_hosted_system(): + return [] + + subnet_ids = [] + seen = set() + for raw_key in ("system_node_subnet_id", "node_subnet_id", "apiserver_subnet_id"): + subnet_id = self.context.raw_param.get(raw_key) + if subnet_id and subnet_id not in seen: + subnet_ids.append(subnet_id) + seen.add(subnet_id) + return subnet_ids + def process_add_role_assignment_for_vnet_subnet(self, mc: ManagedCluster) -> None: """Add role assignment for vent subnet. @@ -6470,6 +6483,10 @@ def process_add_role_assignment_for_vnet_subnet(self, mc: ManagedCluster) -> Non """ self._ensure_mc(mc) + # Validate before granting roles so a malformed BYO trio does not leave + # partial Network Contributor assignments behind. + self.context.validate_byo_hobo_subnets() + need_post_creation_vnet_permission_granting = False vnet_subnet_id = self.context.get_vnet_subnet_id() skip_subnet_role_assignment = ( @@ -6515,6 +6532,50 @@ def process_add_role_assignment_for_vnet_subnet(self, mc: ManagedCluster) -> Non logger.warning( "Could not create a role assignment for subnet. Are you an Owner on this subscription?" ) + byo_hosted_system_subnet_ids = self._get_byo_hosted_system_subnet_ids() + if byo_hosted_system_subnet_ids and not skip_subnet_role_assignment: + service_principal_profile = mc.service_principal_profile + assign_identity = self.context.get_assign_identity() + pending_post_creation_subnets = [] + if service_principal_profile is None and not assign_identity: + for subnet_id in byo_hosted_system_subnet_ids: + if not self.context.external_functions.subnet_role_assignment_exists(self.cmd, subnet_id): + pending_post_creation_subnets.append(subnet_id) + if pending_post_creation_subnets: + need_post_creation_vnet_permission_granting = True + self.context.set_intermediate( + "byo_hosted_system_subnets_pending_grant", + pending_post_creation_subnets, + overwrite_exists=True, + ) + else: + identity_object_id = None + if assign_identity: + identity_object_id = self.context.get_user_assigned_identity_object_id() + for subnet_id in byo_hosted_system_subnet_ids: + if self.context.external_functions.subnet_role_assignment_exists(self.cmd, subnet_id): + continue + if assign_identity: + added = self.context.external_functions.add_role_assignment( + self.cmd, + "Network Contributor", + identity_object_id, + is_service_principal=False, + scope=subnet_id, + ) + else: + added = self.context.external_functions.add_role_assignment( + self.cmd, + "Network Contributor", + service_principal_profile.client_id, + scope=subnet_id, + ) + if not added: + logger.warning( + "Could not create a role assignment for subnet %s. " + "Are you an Owner on this subscription?", + subnet_id, + ) # store need_post_creation_vnet_permission_granting as an intermediate self.context.set_intermediate( "need_post_creation_vnet_permission_granting", @@ -7767,16 +7828,27 @@ def immediate_processing_after_request(self, mc: ManagedCluster) -> None: # Grant vnet permission to system assigned identity RIGHT AFTER the cluster is put, this operation can # reduce latency for the role assignment take effect instant_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) - if not self.context.external_functions.add_role_assignment( - self.cmd, - "Network Contributor", - instant_cluster.identity.principal_id, - scope=self.context.get_vnet_subnet_id(), - is_service_principal=False, - ): - logger.warning( - "Could not create a role assignment for subnet. Are you an Owner on this subscription?" - ) + scopes = [] + vnet_subnet_id = self.context.get_vnet_subnet_id() + if vnet_subnet_id: + scopes.append(vnet_subnet_id) + byo_hosted_system_subnet_ids = self.context.get_intermediate( + "byo_hosted_system_subnets_pending_grant", default_value=[] + ) + for subnet_id in byo_hosted_system_subnet_ids or []: + if subnet_id and subnet_id not in scopes: + scopes.append(subnet_id) + for scope in scopes: + if not self.context.external_functions.add_role_assignment( + self.cmd, + "Network Contributor", + instant_cluster.identity.principal_id, + scope=scope, + is_service_principal=False, + ): + logger.warning( + "Could not create a role assignment for subnet. Are you an Owner on this subscription?" + ) # pylint: disable=too-many-locals def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 8149cea25ce..28023dfc894 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -6885,6 +6885,101 @@ def test_process_add_role_assignment_for_vnet_subnet(self): False, ) + # BYO VNet for Managed System Pool with user-assigned identity grants all BYO subnets. + system_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/system" + node_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/node" + api_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/api" + identity_obj = Mock( + principal_id="test_object_id", + ) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.AKSManagedClusterContext.get_identity_by_msi_client", + return_value=identity_obj, + ): + dec_6 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_managed_identity": True, + "sku": "automatic", + "system_node_subnet_id": system_subnet, + "node_subnet_id": node_subnet, + "apiserver_subnet_id": api_subnet, + "skip_subnet_role_assignment": False, + "assign_identity": "test_assign_identity", + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_6 = self.models.ManagedCluster(location="test_location") + dec_6.context.attach_mc(mc_6) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.subnet_role_assignment_exists", + return_value=False, + ), patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.add_role_assignment", + return_value=True, + ) as add_role_assignment: + dec_6.process_add_role_assignment_for_vnet_subnet(mc_6) + add_role_assignment.assert_has_calls([ + call( + self.cmd, + "Network Contributor", + "test_object_id", + is_service_principal=False, + scope=system_subnet, + ), + call( + self.cmd, + "Network Contributor", + "test_object_id", + is_service_principal=False, + scope=node_subnet, + ), + call( + self.cmd, + "Network Contributor", + "test_object_id", + is_service_principal=False, + scope=api_subnet, + ), + ]) + self.assertEqual(add_role_assignment.call_count, 3) + self.assertEqual( + dec_6.context.get_intermediate("need_post_creation_vnet_permission_granting"), + False, + ) + + # BYO VNet for Managed System Pool with system-assigned identity defers all BYO subnets. + dec_7 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_managed_identity": True, + "sku": "automatic", + "system_node_subnet_id": system_subnet, + "node_subnet_id": node_subnet, + "apiserver_subnet_id": api_subnet, + "skip_subnet_role_assignment": False, + "assign_identity": None, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_7 = self.models.ManagedCluster(location="test_location") + dec_7.context.attach_mc(mc_7) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.subnet_role_assignment_exists", + return_value=False, + ): + dec_7.process_add_role_assignment_for_vnet_subnet(mc_7) + self.assertEqual( + dec_7.context.get_intermediate("need_post_creation_vnet_permission_granting"), + True, + ) + self.assertEqual( + dec_7.context.get_intermediate("byo_hosted_system_subnets_pending_grant"), + [system_subnet, node_subnet, api_subnet], + ) + def test_process_attach_acr(self): # default value in `aks_create` dec_1 = AKSManagedClusterCreateDecorator( @@ -8617,6 +8712,49 @@ def test_immediate_processing_after_request(self): is_service_principal=False, ) + dec_2 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + {}, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_2 = self.models.ManagedCluster(location="test_location") + dec_2.context.attach_mc(mc_2) + dec_2.context.set_intermediate("need_post_creation_vnet_permission_granting", True) + dec_2.context.set_intermediate( + "byo_hosted_system_subnets_pending_grant", + ["test_system_subnet_id", "test_node_subnet_id", "test_api_subnet_id"], + ) + self.client.get = Mock(return_value=Mock(identity=Mock(principal_id="test_principal_id"))) + with patch( + "azure.cli.command_modules.acs.managed_cluster_decorator.add_role_assignment", return_value=True + ) as mock_add: + dec_2.immediate_processing_after_request(mc_2) + mock_add.assert_has_calls([ + call( + self.cmd, + "Network Contributor", + "test_principal_id", + scope="test_system_subnet_id", + is_service_principal=False, + ), + call( + self.cmd, + "Network Contributor", + "test_principal_id", + scope="test_node_subnet_id", + is_service_principal=False, + ), + call( + self.cmd, + "Network Contributor", + "test_principal_id", + scope="test_api_subnet_id", + is_service_principal=False, + ), + ]) + self.assertEqual(mock_add.call_count, 3) + def test_postprocessing_after_mc_created(self): dec_1 = AKSManagedClusterCreateDecorator( self.cmd, From 75b0357ef62a89fb1202ef6578ba87f1082f3cf0 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 23:10:17 +0000 Subject: [PATCH 14/21] aks: preserve outbound type when updating hosted-system clusters Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 1 + .../latest/test_managed_cluster_decorator.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 75dd362e002..f963d522942 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2417,6 +2417,7 @@ def _get_outbound_type( skuName is not None and skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and isVnetSubnetIdEmpty and + not read_from_mc and not byo_subnets_set ): # outbound_type of Automatic SKU should be ManagedNATGateway if no subnet id provided. diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 28023dfc894..50be013d5c4 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -2122,6 +2122,25 @@ def test_get_outbound_type(self): expect_outbound_type_13 = CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY self.assertEqual(outbound_type_13,expect_outbound_type_13) + network_profile_14 = self.models.ContainerServiceNetworkProfile( + outbound_type=CONST_OUTBOUND_TYPE_LOAD_BALANCER + ) + mc_14 = self.models.ManagedCluster( + location="test_location", + network_profile=network_profile_14, + sku=self.models.ManagedClusterSKU(name="Automatic"), + ) + ctx_14 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({}), + self.models, + DecoratorMode.UPDATE, + ) + ctx_14.agentpool_context = mock.MagicMock() + ctx_14.agentpool_context.get_vnet_subnet_id.return_value = None + ctx_14.attach_mc(mc_14) + self.assertEqual(ctx_14.get_outbound_type(), CONST_OUTBOUND_TYPE_LOAD_BALANCER) + def test_get_network_plugin_mode(self): # default ctx_1 = AKSManagedClusterContext( From fb312fcc048ffcd7f9038deb8907ee57cc5303d3 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 23:12:20 +0000 Subject: [PATCH 15/21] aks: align BYO subnet outbound validation Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 9 +++-- .../latest/test_managed_cluster_decorator.py | 38 +++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index f963d522942..b3299d91f1b 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2447,21 +2447,22 @@ def _get_outbound_type( return outbound_type # basic sku lb doesn't support outbound type if outbound_type == CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING: - if self.get_vnet_subnet_id() in ["", None]: + if self.get_vnet_subnet_id() in ["", None] and not byo_subnets_set: raise RequiredArgumentMissingError( "--vnet-subnet-id must be specified for userDefinedRouting and it must " "be pre-configured with a route table with egress rules" ) if outbound_type == CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY: - if self.get_vnet_subnet_id() in ["", None]: + if self.get_vnet_subnet_id() in ["", None] and not byo_subnets_set: raise RequiredArgumentMissingError( "--vnet-subnet-id must be specified for userAssignedNATGateway and it must " "be pre-configured with a NAT gateway with outbound ips" ) if outbound_type == CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY: - if self.get_vnet_subnet_id() not in ["", None]: + if self.get_vnet_subnet_id() not in ["", None] or byo_subnets_set: raise InvalidArgumentValueError( - "--vnet-subnet-id cannot be specified for managedNATGateway" + "--vnet-subnet-id, --system-node-subnet-id and --node-subnet-id cannot be " + "specified for managedNATGateway" ) if outbound_type != CONST_OUTBOUND_TYPE_LOAD_BALANCER: if ( diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 50be013d5c4..c457e74d677 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -26,6 +26,7 @@ CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_OPEN_SERVICE_MESH_ADDON_NAME, CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING, + CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY, CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY, CONST_OUTBOUND_TYPE_LOAD_BALANCER, CONST_PRIVATE_DNS_ZONE_NONE, @@ -2141,6 +2142,43 @@ def test_get_outbound_type(self): ctx_14.attach_mc(mc_14) self.assertEqual(ctx_14.get_outbound_type(), CONST_OUTBOUND_TYPE_LOAD_BALANCER) + byo_params = { + "sku": "automatic", + "system_node_subnet_id": "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/sys", + "node_subnet_id": "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/node", + "apiserver_subnet_id": "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/api", + } + ctx_15 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({**byo_params, "outbound_type": CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY}), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx_15.agentpool_context = mock.MagicMock() + ctx_15.agentpool_context.get_vnet_subnet_id.return_value = None + self.assertEqual(ctx_15.get_outbound_type(), CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY) + + ctx_16 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({**byo_params, "outbound_type": CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING}), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx_16.agentpool_context = mock.MagicMock() + ctx_16.agentpool_context.get_vnet_subnet_id.return_value = None + self.assertEqual(ctx_16.get_outbound_type(), CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING) + + ctx_17 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({**byo_params, "outbound_type": CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY}), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx_17.agentpool_context = mock.MagicMock() + ctx_17.agentpool_context.get_vnet_subnet_id.return_value = None + with self.assertRaises(InvalidArgumentValueError): + ctx_17.get_outbound_type() + def test_get_network_plugin_mode(self): # default ctx_1 = AKSManagedClusterContext( From 0ed79d8bfdb453821819e0acd0934f0c01a2e4bf Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 23:36:28 +0000 Subject: [PATCH 16/21] aks: refine hosted-system agent pool handling Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 35 +++++++++----- .../latest/test_managed_cluster_decorator.py | 48 +++++++++++++++++++ 2 files changed, 70 insertions(+), 13 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index b3299d91f1b..768fa717cb1 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4302,9 +4302,12 @@ def get_enable_hosted_system(self) -> bool: if self.decorator_mode != DecoratorMode.CREATE: return False explicit = bool(self.raw_param.get("enable_hosted_system")) - implicit = bool( - self.raw_param.get("system_node_subnet_id") or - self.raw_param.get("node_subnet_id") + implicit = all( + [ + self.raw_param.get("system_node_subnet_id"), + self.raw_param.get("node_subnet_id"), + self.raw_param.get("apiserver_subnet_id"), + ] ) return explicit or implicit @@ -7193,6 +7196,19 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster mc.fqdn_subdomain = fqdn_subdomain return mc + def _remove_cli_synthesized_default_agent_pool(self, mc: ManagedCluster) -> None: + agent_pool_profiles = mc.agent_pool_profiles + if not agent_pool_profiles: + return + + # `set_up_agentpool_profile` always adds the first pool for `az aks create`. + # A Managed System Pool cluster gets that system pool from `hosted_system_profile`; + # preserve any additional pools that may have been appended by other create-time logic. + if len(agent_pool_profiles) == 1: + mc.agent_pool_profiles = None + else: + mc.agent_pool_profiles = agent_pool_profiles[1:] + def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. @@ -7202,11 +7218,9 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: - `mc.hosted_system_profile.enabled` is set to True so the RP treats this as a Managed System Pool request. - `system_node_subnet_id` / `node_subnet_id` are populated when supplied. - - `mc.agent_pool_profiles` is cleared. The CLI unconditionally synthesizes - a default agent pool via `set_up_agentpool_profile`; on a Managed System + - The CLI-synthesized default agent pool is removed. On a Managed System Pool cluster the system pool is provisioned server-side from - `hosted_system_profile`, so the CLI default is stale and (in the BYO case) - actively conflicts with the BYO VNet. + `hosted_system_profile`, so the CLI default is stale. :return: the ManagedCluster object """ @@ -7228,12 +7242,7 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id if node_subnet_id: mc.hosted_system_profile.node_subnet_id = node_subnet_id - # Clear the CLI-synthesized default agent pool — the RP provisions the - # system pool from hosted_system_profile instead. Leaving it in causes - # the RP to reject BYO VNet clusters and produces a ghost pool on - # non-BYO Automatic clusters. - if mc.agent_pool_profiles is not None: - mc.agent_pool_profiles = None + self._remove_cli_synthesized_default_agent_pool(mc) return mc def set_up_identity(self, mc: ManagedCluster) -> ManagedCluster: diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index c457e74d677..a92d624a2af 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -7102,6 +7102,54 @@ def test_process_attach_acr(self): dec_3.process_attach_acr(mc_3) ensure_assignment.assert_called_once_with(self.cmd, "test_service_principal", "test_registry_id", False, True, None) + def test_set_up_hosted_system_profile(self): + system_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/system" + node_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/node" + api_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/api" + + dec_1 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "sku": "automatic", + "system_node_subnet_id": system_subnet, + "node_subnet_id": node_subnet, + "apiserver_subnet_id": api_subnet, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_1 = self.models.ManagedCluster(location="test_location") + dec_1.context.attach_mc(mc_1) + cli_default_pool = self.models.ManagedClusterAgentPoolProfile(name="nodepool1") + mc_1.agent_pool_profiles = [cli_default_pool] + + dec_1.set_up_hosted_system_profile(mc_1) + + self.assertTrue(mc_1.hosted_system_profile.enabled) + self.assertEqual(mc_1.hosted_system_profile.system_node_subnet_id, system_subnet) + self.assertEqual(mc_1.hosted_system_profile.node_subnet_id, node_subnet) + self.assertIsNone(mc_1.agent_pool_profiles) + + dec_2 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "sku": "automatic", + "enable_hosted_system": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_2 = self.models.ManagedCluster(location="test_location") + dec_2.context.attach_mc(mc_2) + cli_default_pool = self.models.ManagedClusterAgentPoolProfile(name="nodepool1") + user_pool = self.models.ManagedClusterAgentPoolProfile(name="userpool") + mc_2.agent_pool_profiles = [cli_default_pool, user_pool] + + dec_2.set_up_hosted_system_profile(mc_2) + + self.assertTrue(mc_2.hosted_system_profile.enabled) + self.assertEqual(mc_2.agent_pool_profiles, [user_pool]) + def test_set_up_network_profile(self): # default value in `aks_create` dec_1 = AKSManagedClusterCreateDecorator( From 7fd3c1675e3174c8786c2f12b38aff40149f7280 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 23:41:57 +0000 Subject: [PATCH 17/21] aks: track CLI default pool before hosted-system removal Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 24 +++++++++++++++---- .../latest/test_managed_cluster_decorator.py | 21 ++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 768fa717cb1..e5bdeebdc83 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -6338,6 +6338,11 @@ def set_up_agentpool_profile(self, mc: ManagedCluster) -> ManagedCluster: agentpool_profile = self.agentpool_decorator.construct_agentpool_profile_default() mc.agent_pool_profiles = [agentpool_profile] + self.context.set_intermediate( + "cli_synthesized_default_agent_pool", + agentpool_profile, + overwrite_exists=True, + ) return mc def set_up_mc_properties(self, mc: ManagedCluster) -> ManagedCluster: @@ -7201,13 +7206,22 @@ def _remove_cli_synthesized_default_agent_pool(self, mc: ManagedCluster) -> None if not agent_pool_profiles: return - # `set_up_agentpool_profile` always adds the first pool for `az aks create`. + cli_default_agent_pool = self.context.get_intermediate( + "cli_synthesized_default_agent_pool", default_value=None + ) + if cli_default_agent_pool is None: + return + + # Remove only the exact pool object created by `set_up_agentpool_profile`. # A Managed System Pool cluster gets that system pool from `hosted_system_profile`; # preserve any additional pools that may have been appended by other create-time logic. - if len(agent_pool_profiles) == 1: - mc.agent_pool_profiles = None - else: - mc.agent_pool_profiles = agent_pool_profiles[1:] + remaining_agent_pool_profiles = [ + profile for profile in agent_pool_profiles + if profile is not cli_default_agent_pool + ] + if len(remaining_agent_pool_profiles) == len(agent_pool_profiles): + return + mc.agent_pool_profiles = remaining_agent_pool_profiles or None def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index a92d624a2af..8daf3d8a121 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -7122,6 +7122,7 @@ def test_set_up_hosted_system_profile(self): dec_1.context.attach_mc(mc_1) cli_default_pool = self.models.ManagedClusterAgentPoolProfile(name="nodepool1") mc_1.agent_pool_profiles = [cli_default_pool] + dec_1.context.set_intermediate("cli_synthesized_default_agent_pool", cli_default_pool) dec_1.set_up_hosted_system_profile(mc_1) @@ -7144,12 +7145,32 @@ def test_set_up_hosted_system_profile(self): cli_default_pool = self.models.ManagedClusterAgentPoolProfile(name="nodepool1") user_pool = self.models.ManagedClusterAgentPoolProfile(name="userpool") mc_2.agent_pool_profiles = [cli_default_pool, user_pool] + dec_2.context.set_intermediate("cli_synthesized_default_agent_pool", cli_default_pool) dec_2.set_up_hosted_system_profile(mc_2) self.assertTrue(mc_2.hosted_system_profile.enabled) self.assertEqual(mc_2.agent_pool_profiles, [user_pool]) + dec_3 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "sku": "automatic", + "enable_hosted_system": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_3 = self.models.ManagedCluster(location="test_location") + dec_3.context.attach_mc(mc_3) + user_pool = self.models.ManagedClusterAgentPoolProfile(name="userpool") + mc_3.agent_pool_profiles = [user_pool] + + dec_3.set_up_hosted_system_profile(mc_3) + + self.assertTrue(mc_3.hosted_system_profile.enabled) + self.assertEqual(mc_3.agent_pool_profiles, [user_pool]) + def test_set_up_network_profile(self): # default value in `aks_create` dec_1 = AKSManagedClusterCreateDecorator( From 717c22f5fdf9eb08bcc1cb66bff54f2f06c5541c Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 23:44:21 +0000 Subject: [PATCH 18/21] aks: skip default agent pool for hosted-system create Signed-off-by: wenhug <50309350+wenhug@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 36 +++------------- .../latest/test_managed_cluster_decorator.py | 42 ++++++++----------- 2 files changed, 22 insertions(+), 56 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index e5bdeebdc83..3460f8be95a 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -6336,13 +6336,11 @@ def set_up_agentpool_profile(self, mc: ManagedCluster) -> ManagedCluster: """ self._ensure_mc(mc) + if self.context.get_enable_hosted_system(): + return mc + agentpool_profile = self.agentpool_decorator.construct_agentpool_profile_default() mc.agent_pool_profiles = [agentpool_profile] - self.context.set_intermediate( - "cli_synthesized_default_agent_pool", - agentpool_profile, - overwrite_exists=True, - ) return mc def set_up_mc_properties(self, mc: ManagedCluster) -> ManagedCluster: @@ -7201,28 +7199,6 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster mc.fqdn_subdomain = fqdn_subdomain return mc - def _remove_cli_synthesized_default_agent_pool(self, mc: ManagedCluster) -> None: - agent_pool_profiles = mc.agent_pool_profiles - if not agent_pool_profiles: - return - - cli_default_agent_pool = self.context.get_intermediate( - "cli_synthesized_default_agent_pool", default_value=None - ) - if cli_default_agent_pool is None: - return - - # Remove only the exact pool object created by `set_up_agentpool_profile`. - # A Managed System Pool cluster gets that system pool from `hosted_system_profile`; - # preserve any additional pools that may have been appended by other create-time logic. - remaining_agent_pool_profiles = [ - profile for profile in agent_pool_profiles - if profile is not cli_default_agent_pool - ] - if len(remaining_agent_pool_profiles) == len(agent_pool_profiles): - return - mc.agent_pool_profiles = remaining_agent_pool_profiles or None - def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: """Set up hosted_system_profile on the ManagedCluster for Automatic SKU clusters. @@ -7232,9 +7208,8 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: - `mc.hosted_system_profile.enabled` is set to True so the RP treats this as a Managed System Pool request. - `system_node_subnet_id` / `node_subnet_id` are populated when supplied. - - The CLI-synthesized default agent pool is removed. On a Managed System - Pool cluster the system pool is provisioned server-side from - `hosted_system_profile`, so the CLI default is stale. + - `set_up_agentpool_profile` does not synthesize the default agent pool, + because the system pool is provisioned server-side from `hosted_system_profile`. :return: the ManagedCluster object """ @@ -7256,7 +7231,6 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.hosted_system_profile.system_node_subnet_id = system_node_subnet_id if node_subnet_id: mc.hosted_system_profile.node_subnet_id = node_subnet_id - self._remove_cli_synthesized_default_agent_pool(mc) return mc def set_up_identity(self, mc: ManagedCluster) -> ManagedCluster: diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 8daf3d8a121..378b3532a67 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -6541,6 +6541,22 @@ def test_set_up_agentpool_profile(self): ground_truth_mc_1.agent_pool_profiles = [ground_truth_agentpool_profile_1] self.assertEqual(dec_mc_1, ground_truth_mc_1) + # Managed System Pool clusters get their system pool from hosted_system_profile, + # so the CLI should not synthesize a default agent pool up front. + dec_2 = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "sku": "automatic", + "enable_hosted_system": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc_2 = self.models.ManagedCluster(location="test_location") + dec_2.context.attach_mc(mc_2) + dec_mc_2 = dec_2.set_up_agentpool_profile(mc_2) + self.assertIsNone(dec_mc_2.agent_pool_profiles) + def test_set_up_mc_properties(self): dec_1 = AKSManagedClusterCreateDecorator( self.cmd, @@ -7120,9 +7136,6 @@ def test_set_up_hosted_system_profile(self): ) mc_1 = self.models.ManagedCluster(location="test_location") dec_1.context.attach_mc(mc_1) - cli_default_pool = self.models.ManagedClusterAgentPoolProfile(name="nodepool1") - mc_1.agent_pool_profiles = [cli_default_pool] - dec_1.context.set_intermediate("cli_synthesized_default_agent_pool", cli_default_pool) dec_1.set_up_hosted_system_profile(mc_1) @@ -7142,35 +7155,14 @@ def test_set_up_hosted_system_profile(self): ) mc_2 = self.models.ManagedCluster(location="test_location") dec_2.context.attach_mc(mc_2) - cli_default_pool = self.models.ManagedClusterAgentPoolProfile(name="nodepool1") user_pool = self.models.ManagedClusterAgentPoolProfile(name="userpool") - mc_2.agent_pool_profiles = [cli_default_pool, user_pool] - dec_2.context.set_intermediate("cli_synthesized_default_agent_pool", cli_default_pool) + mc_2.agent_pool_profiles = [user_pool] dec_2.set_up_hosted_system_profile(mc_2) self.assertTrue(mc_2.hosted_system_profile.enabled) self.assertEqual(mc_2.agent_pool_profiles, [user_pool]) - dec_3 = AKSManagedClusterCreateDecorator( - self.cmd, - self.client, - { - "sku": "automatic", - "enable_hosted_system": True, - }, - ResourceType.MGMT_CONTAINERSERVICE, - ) - mc_3 = self.models.ManagedCluster(location="test_location") - dec_3.context.attach_mc(mc_3) - user_pool = self.models.ManagedClusterAgentPoolProfile(name="userpool") - mc_3.agent_pool_profiles = [user_pool] - - dec_3.set_up_hosted_system_profile(mc_3) - - self.assertTrue(mc_3.hosted_system_profile.enabled) - self.assertEqual(mc_3.agent_pool_profiles, [user_pool]) - def test_set_up_network_profile(self): # default value in `aks_create` dec_1 = AKSManagedClusterCreateDecorator( From b9553b7a29240277d3d71f164036bddb748409f9 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Fri, 24 Apr 2026 23:48:43 +0000 Subject: [PATCH 19/21] aks: remove hosted system hobo wording --- .../command_modules/acs/managed_cluster_decorator.py | 10 +++++----- .../tests/latest/test_managed_cluster_decorator.py | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 3460f8be95a..dc289c56c4e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -4295,7 +4295,7 @@ def get_enable_hosted_system(self) -> bool: """Obtain the value of enable_hosted_system. Returns True when the user explicitly opts in via --enable-hosted-system, - or implicitly via the BYO VNet subnet trio (which is HOBO-only). + or implicitly via the BYO VNet subnet trio for Managed System Pool. :return: bool """ @@ -4311,7 +4311,7 @@ def get_enable_hosted_system(self) -> bool: ) return explicit or implicit - def validate_byo_hobo_subnets(self) -> None: + def validate_byo_hosted_system_subnets(self) -> None: """Validate the BYO VNet subnet trio and the --enable-hosted-system flag. BYO VNet for a Managed System Pool is triggered by --system-node-subnet-id / @@ -6493,7 +6493,7 @@ def process_add_role_assignment_for_vnet_subnet(self, mc: ManagedCluster) -> Non # Validate before granting roles so a malformed BYO trio does not leave # partial Network Contributor assignments behind. - self.context.validate_byo_hobo_subnets() + self.context.validate_byo_hosted_system_subnets() need_post_creation_vnet_permission_granting = False vnet_subnet_id = self.context.get_vnet_subnet_id() @@ -7162,7 +7162,7 @@ def set_up_api_server_access_profile(self, mc: ManagedCluster) -> ManagedCluster # Run BYO VNet trio validation first so clearer errors surface before the # generic --apiserver-subnet-id checks inside _get_apiserver_subnet_id. - self.context.validate_byo_hobo_subnets() + self.context.validate_byo_hosted_system_subnets() api_server_access_profile = None api_server_authorized_ip_ranges = self.context.get_api_server_authorized_ip_ranges() @@ -7216,7 +7216,7 @@ def set_up_hosted_system_profile(self, mc: ManagedCluster) -> ManagedCluster: self._ensure_mc(mc) # Run cross-flag validation (--enable-hosted-system SKU gate + BYO trio completeness) - self.context.validate_byo_hobo_subnets() + self.context.validate_byo_hosted_system_subnets() system_node_subnet_id = self.context.get_system_node_subnet_id() node_subnet_id = self.context.get_node_subnet_id() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 378b3532a67..3bc6f3c123d 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -4483,7 +4483,7 @@ def test_get_apiserver_subnet_id(self): with self.assertRaises(RequiredArgumentMissingError): ctx_6.get_apiserver_subnet_id() - def test_byo_hobo_subnets_validation(self): + def test_byo_hosted_system_subnets_validation(self): system_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/sys" node_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/nod" api_subnet = "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/api" @@ -4499,7 +4499,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(RequiredArgumentMissingError): - ctx.validate_byo_hobo_subnets() + ctx.validate_byo_hosted_system_subnets() # trio without --sku automatic -> RequiredArgumentMissingError ctx = AKSManagedClusterContext( @@ -4514,7 +4514,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(RequiredArgumentMissingError): - ctx.validate_byo_hobo_subnets() + ctx.validate_byo_hosted_system_subnets() # happy path: full trio + automatic ctx = AKSManagedClusterContext( @@ -4528,7 +4528,7 @@ def test_byo_hobo_subnets_validation(self): self.models, decorator_mode=DecoratorMode.CREATE, ) - ctx.validate_byo_hobo_subnets() + ctx.validate_byo_hosted_system_subnets() self.assertEqual(ctx.get_system_node_subnet_id(), system_subnet) self.assertEqual(ctx.get_node_subnet_id(), node_subnet) self.assertTrue(ctx.get_enable_hosted_system()) # BYO trio implies enable_hosted_system @@ -4544,7 +4544,7 @@ def test_byo_hobo_subnets_validation(self): decorator_mode=DecoratorMode.CREATE, ) with self.assertRaises(RequiredArgumentMissingError): - ctx.validate_byo_hobo_subnets() + ctx.validate_byo_hosted_system_subnets() # happy path: --enable-hosted-system alone on automatic ctx = AKSManagedClusterContext( @@ -4556,7 +4556,7 @@ def test_byo_hobo_subnets_validation(self): self.models, decorator_mode=DecoratorMode.CREATE, ) - ctx.validate_byo_hobo_subnets() + ctx.validate_byo_hosted_system_subnets() self.assertTrue(ctx.get_enable_hosted_system()) def test_get_private_dns_zone(self): From 4417e80f9365179457e7caff4c71c14b58af09d1 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Sat, 25 Apr 2026 01:32:05 +0000 Subject: [PATCH 20/21] aks: clarify automatic outbound subnet errors --- .../acs/managed_cluster_decorator.py | 17 +++++++++- .../latest/test_managed_cluster_decorator.py | 34 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index dc289c56c4e..5a427e94a19 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2418,7 +2418,8 @@ def _get_outbound_type( skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and isVnetSubnetIdEmpty and not read_from_mc and - not byo_subnets_set + not byo_subnets_set and + outbound_type == CONST_OUTBOUND_TYPE_LOAD_BALANCER ): # outbound_type of Automatic SKU should be ManagedNATGateway if no subnet id provided. outbound_type = CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY @@ -2448,12 +2449,26 @@ def _get_outbound_type( if outbound_type == CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING: if self.get_vnet_subnet_id() in ["", None] and not byo_subnets_set: + if skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: + raise RequiredArgumentMissingError( + "--vnet-subnet-id must be specified for userDefinedRouting. For an Automatic cluster " + "using Managed System Pool BYO VNet, specify --system-node-subnet-id, --node-subnet-id " + "and --apiserver-subnet-id instead. The subnet must be pre-configured with a route " + "table with egress rules" + ) raise RequiredArgumentMissingError( "--vnet-subnet-id must be specified for userDefinedRouting and it must " "be pre-configured with a route table with egress rules" ) if outbound_type == CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY: if self.get_vnet_subnet_id() in ["", None] and not byo_subnets_set: + if skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: + raise RequiredArgumentMissingError( + "--vnet-subnet-id must be specified for userAssignedNATGateway. For an Automatic cluster " + "using Managed System Pool BYO VNet, specify --system-node-subnet-id, --node-subnet-id " + "and --apiserver-subnet-id instead. The subnet must be pre-configured with a NAT gateway " + "with outbound ips" + ) raise RequiredArgumentMissingError( "--vnet-subnet-id must be specified for userAssignedNATGateway and it must " "be pre-configured with a NAT gateway with outbound ips" diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 3bc6f3c123d..b34f3279ce9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -2142,6 +2142,40 @@ def test_get_outbound_type(self): ctx_14.attach_mc(mc_14) self.assertEqual(ctx_14.get_outbound_type(), CONST_OUTBOUND_TYPE_LOAD_BALANCER) + ctx_14_1 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "outbound_type": CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx_14_1.agentpool_context = mock.MagicMock() + ctx_14_1.agentpool_context.get_vnet_subnet_id.return_value = None + with self.assertRaisesRegex( + RequiredArgumentMissingError, + "--system-node-subnet-id, --node-subnet-id and --apiserver-subnet-id", + ): + ctx_14_1.get_outbound_type() + + ctx_14_2 = AKSManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "sku": "automatic", + "outbound_type": CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + ctx_14_2.agentpool_context = mock.MagicMock() + ctx_14_2.agentpool_context.get_vnet_subnet_id.return_value = None + with self.assertRaisesRegex( + RequiredArgumentMissingError, + "--system-node-subnet-id, --node-subnet-id and --apiserver-subnet-id", + ): + ctx_14_2.get_outbound_type() + byo_params = { "sku": "automatic", "system_node_subnet_id": "/subscriptions/s/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/sys", From 234b3fb234c226f90843b9efc2c9350d48fb44a6 Mon Sep 17 00:00:00 2001 From: wenhug <50309350+wenhug@users.noreply.github.com> Date: Mon, 27 Apr 2026 07:28:13 +0000 Subject: [PATCH 21/21] aks: reduce outbound type validation complexity --- .../acs/managed_cluster_decorator.py | 62 ++++++++++--------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 5a427e94a19..cb16ef969df 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -2362,6 +2362,30 @@ def get_sku_name(self) -> str: skuName = CONST_MANAGED_CLUSTER_SKU_NAME_BASE return skuName + @staticmethod + def _raise_missing_vnet_subnet_for_outbound_type(outbound_type: str, sku_name: str) -> None: + if outbound_type == CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING: + subnet_requirement = "a route table with egress rules" + else: + subnet_requirement = "a NAT gateway with outbound ips" + + if sku_name == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: + raise RequiredArgumentMissingError( + "--vnet-subnet-id must be specified for {outbound_type}. For an Automatic cluster " + "using Managed System Pool BYO VNet, specify --system-node-subnet-id, --node-subnet-id " + "and --apiserver-subnet-id instead. The subnet must be pre-configured with {requirement}".format( + outbound_type=outbound_type, + requirement=subnet_requirement, + ) + ) + raise RequiredArgumentMissingError( + "--vnet-subnet-id must be specified for {outbound_type} and it must " + "be pre-configured with {requirement}".format( + outbound_type=outbound_type, + requirement=subnet_requirement, + ) + ) + def _get_outbound_type( self, enable_validation: bool = False, @@ -2413,14 +2437,14 @@ def _get_outbound_type( self.raw_param.get("system_node_subnet_id") or self.raw_param.get("node_subnet_id") ) - if ( + use_automatic_managed_nat_gateway = ( skuName is not None and skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC and isVnetSubnetIdEmpty and not read_from_mc and - not byo_subnets_set and - outbound_type == CONST_OUTBOUND_TYPE_LOAD_BALANCER - ): + not byo_subnets_set + ) + if use_automatic_managed_nat_gateway and outbound_type == CONST_OUTBOUND_TYPE_LOAD_BALANCER: # outbound_type of Automatic SKU should be ManagedNATGateway if no subnet id provided. outbound_type = CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY @@ -2447,32 +2471,12 @@ def _get_outbound_type( ) return outbound_type # basic sku lb doesn't support outbound type - if outbound_type == CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING: - if self.get_vnet_subnet_id() in ["", None] and not byo_subnets_set: - if skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: - raise RequiredArgumentMissingError( - "--vnet-subnet-id must be specified for userDefinedRouting. For an Automatic cluster " - "using Managed System Pool BYO VNet, specify --system-node-subnet-id, --node-subnet-id " - "and --apiserver-subnet-id instead. The subnet must be pre-configured with a route " - "table with egress rules" - ) - raise RequiredArgumentMissingError( - "--vnet-subnet-id must be specified for userDefinedRouting and it must " - "be pre-configured with a route table with egress rules" - ) - if outbound_type == CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY: + if outbound_type in [ + CONST_OUTBOUND_TYPE_USER_DEFINED_ROUTING, + CONST_OUTBOUND_TYPE_USER_ASSIGNED_NAT_GATEWAY, + ]: if self.get_vnet_subnet_id() in ["", None] and not byo_subnets_set: - if skuName == CONST_MANAGED_CLUSTER_SKU_NAME_AUTOMATIC: - raise RequiredArgumentMissingError( - "--vnet-subnet-id must be specified for userAssignedNATGateway. For an Automatic cluster " - "using Managed System Pool BYO VNet, specify --system-node-subnet-id, --node-subnet-id " - "and --apiserver-subnet-id instead. The subnet must be pre-configured with a NAT gateway " - "with outbound ips" - ) - raise RequiredArgumentMissingError( - "--vnet-subnet-id must be specified for userAssignedNATGateway and it must " - "be pre-configured with a NAT gateway with outbound ips" - ) + self._raise_missing_vnet_subnet_for_outbound_type(outbound_type, skuName) if outbound_type == CONST_OUTBOUND_TYPE_MANAGED_NAT_GATEWAY: if self.get_vnet_subnet_id() not in ["", None] or byo_subnets_set: raise InvalidArgumentValueError(