From b88da5919511f97aa587d00d8c436e80af92bf60 Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Wed, 15 Apr 2026 15:48:44 -0700 Subject: [PATCH] [AKS] Add --enable-control-plane-metrics and --disable-control-plane-metrics for Managed Prometheus Add CLI support for the ManagedClusterAzureMonitorProfileMetricsControlPlane feature, which enables collection of control plane component metrics (kube-apiserver, etcd, etc.) via the Azure Managed Prometheus addon. Changes: - Add --enable-control-plane-metrics param for aks create and update - Add --disable-control-plane-metrics param for aks create and update - Add getter/validator methods with mutual exclusivity checks - Add validation: control plane metrics flags require --enable-azure-monitor-metrics or that Azure Monitor Metrics is already enabled on the cluster - Wire control_plane into set_up/update_azure_monitor_profile decorators - Add help text for all new parameters Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_help.py | 12 ++ .../azure/cli/command_modules/acs/_params.py | 4 + .../azure/cli/command_modules/acs/custom.py | 4 + .../acs/managed_cluster_decorator.py | 103 ++++++++++++++++++ 4 files changed, 123 insertions(+) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 7a89268ed9f..1d8078d3f8c 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -534,6 +534,12 @@ - name: --enable-windows-recording-rules type: bool short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon + - name: --enable-control-plane-metrics + type: bool + short-summary: Enable collection of control plane metrics for the Azure Managed Prometheus addon. Configures collection of operational runtime metrics from managed control plane components (kube-apiserver, etcd, etc). See aka.ms/aks/controlplane-metrics for details. Must be used with --enable-azure-monitor-metrics. + - name: --disable-control-plane-metrics + type: bool + short-summary: Disable collection of control plane metrics for the Azure Managed Prometheus addon. - name: --nodepool-taints type: string short-summary: The node taints for all node pool. @@ -1063,6 +1069,12 @@ - name: --disable-azure-monitor-metrics type: bool short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster. + - name: --enable-control-plane-metrics + type: bool + short-summary: Enable collection of control plane metrics for the Azure Managed Prometheus addon. Configures collection of operational runtime metrics from managed control plane components (kube-apiserver, etcd, etc). See aka.ms/aks/controlplane-metrics for details. + - name: --disable-control-plane-metrics + type: bool + short-summary: Disable collection of control plane metrics for the Azure Managed Prometheus addon. - name: --nodepool-taints type: string short-summary: The node taints for all node pool. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 49e2d17f585..3e3db521aa6 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -568,6 +568,8 @@ def load_arguments(self, _): c.argument('ksm_metric_annotations_allow_list') c.argument('grafana_resource_id', validator=validate_grafanaresourceid) c.argument('enable_windows_recording_rules', action='store_true') + c.argument('enable_control_plane_metrics', action='store_true') + c.argument('disable_control_plane_metrics', action='store_true') c.argument('node_public_ip_tags', arg_type=tags_type, validator=validate_node_public_ip_tags, help='space-separated tags: key[=value] [key[=value] ...].') # azure container storage @@ -795,6 +797,8 @@ def load_arguments(self, _): c.argument('grafana_resource_id', validator=validate_grafanaresourceid) c.argument('enable_windows_recording_rules', action='store_true') c.argument('disable_azure_monitor_metrics', action='store_true') + c.argument('enable_control_plane_metrics', action='store_true') + c.argument('disable_control_plane_metrics', action='store_true') # azure container storage c.argument( "enable_azure_container_storage", diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index b0d7ece1360..c8ae7ecf6b0 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1008,6 +1008,8 @@ def aks_create( ksm_metric_annotations_allow_list=None, grafana_resource_id=None, enable_windows_recording_rules=False, + enable_control_plane_metrics=False, + disable_control_plane_metrics=False, # azure container storage enable_azure_container_storage=None, container_storage_version=None, @@ -1200,6 +1202,8 @@ def aks_update( grafana_resource_id=None, enable_windows_recording_rules=False, disable_azure_monitor_metrics=False, + enable_control_plane_metrics=False, + disable_control_plane_metrics=False, # azure container storage enable_azure_container_storage=None, disable_azure_container_storage=None, diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index b24a7262f56..20df63ef26b 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -5762,6 +5762,91 @@ def get_disable_azure_monitor_metrics(self) -> bool: """ return self._get_disable_azure_monitor_metrics(enable_validation=True) + def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of enable_control_plane_metrics. + + :return: bool + """ + enable_control_plane_metrics = self.raw_param.get("enable_control_plane_metrics") + if enable_validation: + if enable_control_plane_metrics and self._get_disable_control_plane_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics and " + "--disable-control-plane-metrics at the same time" + ) + if enable_control_plane_metrics: + # In create mode, --enable-azure-monitor-metrics must be specified + if self.decorator_mode == DecoratorMode.CREATE: + if not self._get_enable_azure_monitor_metrics(False): + raise RequiredArgumentMissingError( + "--enable-control-plane-metrics cannot be used as a standalone flag. " + "This flag must be used in conjunction with --enable-azure-monitor-metrics " + "to enable control plane metrics collection. " + "Usage: az aks create --enable-azure-monitor-metrics --name " + "--resource-group --enable-control-plane-metrics" + ) + # In update mode, azure monitor metrics must already be enabled on the cluster or being enabled now + if self.decorator_mode == DecoratorMode.UPDATE: + is_metrics_enabled = ( + self.mc and + hasattr(self.mc, "azure_monitor_profile") and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + getattr(self.mc.azure_monitor_profile.metrics, "enabled", False) + ) + if not self._get_enable_azure_monitor_metrics(False) and not is_metrics_enabled: + raise RequiredArgumentMissingError( + "--enable-control-plane-metrics cannot be used as a standalone flag. " + "This flag must be used in conjunction with --enable-azure-monitor-metrics " + "to enable control plane metrics collection, or Azure Monitor Metrics must " + "already be enabled on the cluster. " + "Usage: az aks update --enable-azure-monitor-metrics --name " + "--resource-group --enable-control-plane-metrics" + ) + return enable_control_plane_metrics + + def get_enable_control_plane_metrics(self) -> bool: + """Obtain the value of enable_control_plane_metrics. + + :return: bool + """ + return self._get_enable_control_plane_metrics(enable_validation=True) + + def _get_disable_control_plane_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of disable_control_plane_metrics. + + :return: bool + """ + disable_control_plane_metrics = self.raw_param.get("disable_control_plane_metrics") + if enable_validation: + if disable_control_plane_metrics and self._get_enable_control_plane_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics and " + "--disable-control-plane-metrics at the same time" + ) + if disable_control_plane_metrics: + # Control plane metrics can only be disabled if azure monitor metrics is enabled + is_metrics_enabled = ( + self.mc and + hasattr(self.mc, "azure_monitor_profile") and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + getattr(self.mc.azure_monitor_profile.metrics, "enabled", False) + ) + if not self._get_enable_azure_monitor_metrics(False) and not is_metrics_enabled: + raise RequiredArgumentMissingError( + "--disable-control-plane-metrics requires Azure Monitor Metrics to be enabled " + "on the cluster. Enable it first with --enable-azure-monitor-metrics." + ) + return disable_control_plane_metrics + + def get_disable_control_plane_metrics(self) -> bool: + """Obtain the value of disable_control_plane_metrics. + + :return: bool + """ + return self._get_disable_control_plane_metrics(enable_validation=True) + def _get_enable_vpa(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_vpa. This function supports the option of enable_vpa. When enabled, if both enable_vpa and enable_vpa are @@ -7328,6 +7413,13 @@ def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.azure_monitor_profile.metrics.kube_state_metrics = self.models.ManagedClusterAzureMonitorProfileKubeStateMetrics( # pylint:disable=line-too-long metric_labels_allowlist=str(ksm_metric_labels_allow_list), metric_annotations_allow_list=str(ksm_metric_annotations_allow_list)) + # set up control plane metrics if requested + enable_control_plane_metrics = self.context.raw_param.get("enable_control_plane_metrics") + disable_control_plane_metrics = self.context.raw_param.get("disable_control_plane_metrics") + if enable_control_plane_metrics: + mc.azure_monitor_profile.metrics.control_plane = self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=True) # pylint:disable=line-too-long + elif disable_control_plane_metrics: + mc.azure_monitor_profile.metrics.control_plane = self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=False) # pylint:disable=line-too-long # set intermediate self.context.set_intermediate("azuremonitormetrics_addon_enabled", True, overwrite_exists=True) return mc @@ -9269,6 +9361,17 @@ def update_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() mc.azure_monitor_profile.metrics = self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=False) + # handle control plane metrics enable/disable independently + enable_control_plane_metrics = self.context.get_enable_control_plane_metrics() + disable_control_plane_metrics = self.context.get_disable_control_plane_metrics() + if enable_control_plane_metrics or disable_control_plane_metrics: + if mc.azure_monitor_profile is None: + mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() + if mc.azure_monitor_profile.metrics is None: + mc.azure_monitor_profile.metrics = self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + mc.azure_monitor_profile.metrics.control_plane = self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane( # pylint:disable=line-too-long + enabled=bool(enable_control_plane_metrics)) + if ( self.context.raw_param.get("enable_azure_monitor_metrics") or self.context.raw_param.get("disable_azure_monitor_metrics")