From 6002cd2a9ccd40ddf7224ad089b02c7f2596de7a Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 10 Mar 2026 15:45:13 +0000 Subject: [PATCH 01/32] Fix update enable cnl hlsm --- .../managed_cluster_decorator.py | 19 +- .../latest/test_managed_cluster_decorator.py | 631 ++++++++++++++++++ 2 files changed, 647 insertions(+), 3 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index d0ccbfb0389..45c9630b0c8 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -5305,8 +5305,21 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: cluster, cluster_resource_id, self.cmd ) elif (self.context.raw_param.get("enable_addons") is not None or - self.context.raw_param.get("enable-azure-monitor-logs") is not None): - # Create the DCR Association here + self.context.raw_param.get("enable-azure-monitor-logs") is not None or + self.context.raw_param.get("enable_container_network_logs") is not None or + self.context.raw_param.get("enable_retina_flow_logs") is not None or + self.context.raw_param.get("disable_container_network_logs") is not None or + self.context.raw_param.get("disable_retina_flow_logs") is not None or + self.context.raw_param.get("enable_high_log_scale_mode") is not None): + # Create/update the DCR when CNL or HLSM flags change so that the DCR streams + # (e.g. Microsoft-ContainerLogV2-HighScale) are kept in sync. + cnl_or_hlsm_changing = ( + self.context.raw_param.get("enable_container_network_logs") is not None or + self.context.raw_param.get("enable_retina_flow_logs") is not None or + self.context.raw_param.get("disable_container_network_logs") is not None or + self.context.raw_param.get("disable_retina_flow_logs") is not None or + self.context.raw_param.get("enable_high_log_scale_mode") is not None + ) addon_consts = self.context.get_addon_consts() CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") self.context.external_functions.ensure_container_insights_for_monitoring( @@ -5318,7 +5331,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.get_location(), remove_monitoring=False, aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=False, + create_dcr=cnl_or_hlsm_changing, create_dcra=True, enable_syslog=self.context.get_enable_syslog(), data_collection_settings=self.context.get_data_collection_settings(), diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index f84639fab49..fbc7bd1248c 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -7909,6 +7909,198 @@ def test_get_enable_azure_monitor_logs_create_mode_succeeds(self): result = ctx_1.get_enable_azure_monitor_logs() self.assertTrue(result) + # ------------------------------------------------------------------ + # Tests for postprocessing_after_mc_created: + # cnl_or_hlsm_changing flag logic → create_dcr passed to + # ensure_container_insights_for_monitoring + # ------------------------------------------------------------------ + def _make_postprocessing_decorator(self, extra_raw_params): + """Helper: build a minimal Create decorator ready for postprocessing tests.""" + raw_params = { + "name": "test_name", + "resource_group_name": "test_rg_name", + "location": "test_location", + "enable_msi_auth_for_monitoring": True, + "enable_syslog": False, + "data_collection_settings": None, + } + raw_params.update(extra_raw_params) + dec = AKSPreviewManagedClusterCreateDecorator( + self.cmd, + self.client, + raw_params, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster(location="test_location") + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test_subscription_id") + dec.context.set_intermediate("monitoring_addon_enabled", True) + return dec + + def _make_cluster_with_monitoring(self): + """Helper: build a minimal cluster object that has the monitoring addon profile.""" + return self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True) + }, + ) + + def test_postprocessing_create_dcr_false_when_only_enable_addons(self): + """create_dcr=False when only enable_addons triggers ensure_container_insights_for_monitoring. + + enable_addons is in the outer condition but NOT in cnl_or_hlsm_changing, + so create_dcr must be False. + """ + dec = self._make_postprocessing_decorator({"enable_addons": "monitoring"}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertFalse(kwargs["create_dcr"]) + + def test_postprocessing_ensure_container_insights_not_called_without_relevant_flags(self): + """ensure_container_insights_for_monitoring is NOT called when no relevant flags are set. + + When neither enable_addons, enable-azure-monitor-logs, nor any CNL/HLSM flag is + present in raw_params, the outer elif is not entered. + """ + dec = self._make_postprocessing_decorator({}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_not_called() + + def test_postprocessing_create_dcr_true_when_enable_container_network_logs(self): + """create_dcr=True when enable_container_network_logs is set. + + enable_container_network_logs is in cnl_or_hlsm_changing, so create_dcr must be True. + get_enable_high_log_scale_mode is mocked to bypass ACNS/monitoring validation. + """ + dec = self._make_postprocessing_decorator({"enable_container_network_logs": True}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm, patch.object( + dec.context, "get_enable_high_log_scale_mode", return_value=True + ): + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + + def test_postprocessing_create_dcr_true_when_enable_retina_flow_logs(self): + """create_dcr=True when the deprecated enable_retina_flow_logs flag is set. + + enable_retina_flow_logs is in cnl_or_hlsm_changing, so create_dcr must be True. + """ + dec = self._make_postprocessing_decorator({"enable_retina_flow_logs": True}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm, patch.object( + dec.context, "get_enable_high_log_scale_mode", return_value=True + ): + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + + def test_postprocessing_create_dcr_true_when_disable_container_network_logs(self): + """create_dcr=True when disable_container_network_logs is set. + + disable_container_network_logs is in cnl_or_hlsm_changing, so create_dcr must be True. + """ + dec = self._make_postprocessing_decorator({"disable_container_network_logs": True}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + + def test_postprocessing_create_dcr_true_when_disable_retina_flow_logs(self): + """create_dcr=True when the deprecated disable_retina_flow_logs flag is set. + + disable_retina_flow_logs is in cnl_or_hlsm_changing, so create_dcr must be True. + """ + dec = self._make_postprocessing_decorator({"disable_retina_flow_logs": True}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + + def test_postprocessing_create_dcr_true_when_enable_high_log_scale_mode_true(self): + """create_dcr=True when enable_high_log_scale_mode=True is set. + + enable_high_log_scale_mode is in cnl_or_hlsm_changing (any non-None value counts), + so create_dcr must be True. + """ + dec = self._make_postprocessing_decorator({"enable_high_log_scale_mode": True}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + + def test_postprocessing_create_dcr_true_when_enable_high_log_scale_mode_false(self): + """create_dcr=True when enable_high_log_scale_mode=False is explicitly set. + + The cnl_or_hlsm_changing check uses `is not None`, so even an explicit False + value means the DCR must be updated. + """ + dec = self._make_postprocessing_decorator({"enable_high_log_scale_mode": False}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + + def test_postprocessing_create_dcr_true_when_cnl_and_hlsm_both_set(self): + """create_dcr=True when both enable_container_network_logs and enable_high_log_scale_mode are set. + + Both flags are individually sufficient to set cnl_or_hlsm_changing=True. + """ + dec = self._make_postprocessing_decorator( + {"enable_container_network_logs": True, "enable_high_log_scale_mode": True} + ) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm, patch.object( + dec.context, "get_enable_high_log_scale_mode", return_value=True + ): + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + def test_set_up_health_monitor_profile(self): # no flag - no change @@ -12899,6 +13091,8 @@ def test_enable_container_network_logs(self): }, ) self.assertEqual(dec_mc_1, ground_truth_mc_1) + # Verify HLSM is auto-enabled when CNL is enabled + self.assertTrue(dec_1.context.get_enable_high_log_scale_mode()) # Case 2: acns is enabled, monitoring is enabled, disable retina network flow logs dec_2 = AKSPreviewManagedClusterUpdateDecorator( @@ -13143,6 +13337,8 @@ def test_enable_container_network_logs(self): }, ) self.assertEqual(dec_mc_7, ground_truth_mc_7) + # Verify HLSM is auto-enabled when using deprecated flag + self.assertTrue(dec_7.context.get_enable_high_log_scale_mode()) # Case 8: Error when explicitly disabling high log scale mode with container network logs enabled dec_8 = AKSPreviewManagedClusterUpdateDecorator( @@ -13272,6 +13468,441 @@ def test_enable_container_network_logs(self): } self.assertEqual(dec_mc_11.addon_profiles["omsagent"], ground_truth_mc_11["omsagent"]) + # Case 12: Verify monitoring_addon_postprocessing_required is set when CNL is enabled (update path) + # This test verifies the fix for the bug where DCR is not updated when enabling CNL on update + dec_12 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_12 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + dec_12.context.attach_mc(mc_12) + dec_mc_12 = dec_12.update_monitoring_profile_flow_logs(mc_12) + # Verify the intermediate is set to trigger DCR update in postprocessing + self.assertTrue(dec_12.context.get_intermediate("monitoring_addon_postprocessing_required")) + # Verify HLSM is auto-enabled when CNL is enabled + self.assertTrue(dec_12.context.get_enable_high_log_scale_mode()) + ground_truth_mc_12 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "True"} + ) + }, + ) + self.assertEqual(dec_mc_12, ground_truth_mc_12) + + # Case 13: Verify monitoring_addon_postprocessing_required is NOT set when CNL is disabled (update path) + # Disabling CNL should not trigger DCR update + dec_13 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_container_network_logs": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_13 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "True"} + ) + }, + ) + dec_13.context.attach_mc(mc_13) + dec_mc_13 = dec_13.update_monitoring_profile_flow_logs(mc_13) + # Disabling CNL should not set the postprocessing intermediate + self.assertFalse(dec_13.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False)) + ground_truth_mc_13 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "False"} + ) + }, + ) + self.assertEqual(dec_mc_13, ground_truth_mc_13) + + # Case 14: Verify monitoring_addon_postprocessing_required is set when using deprecated flag (update path) + dec_14 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_retina_flow_logs": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_14 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + dec_14.context.attach_mc(mc_14) + dec_mc_14 = dec_14.update_monitoring_profile_flow_logs(mc_14) + # Verify the intermediate is set to trigger DCR update in postprocessing + self.assertTrue(dec_14.context.get_intermediate("monitoring_addon_postprocessing_required")) + # Verify HLSM is auto-enabled when using deprecated flag + self.assertTrue(dec_14.context.get_enable_high_log_scale_mode()) + + # Case 15: Standalone HLSM enable with monitoring addon enabled and MSI auth + # This test verifies the fix for the bug where standalone --enable-high-log-scale-mode was silently ignored + dec_15 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_15 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec_15.context.attach_mc(mc_15) + dec_mc_15 = dec_15.update_monitoring_profile_flow_logs(mc_15) + # Verify the intermediate is set to trigger DCR update in postprocessing + self.assertTrue(dec_15.context.get_intermediate("monitoring_addon_postprocessing_required")) + + # Case 16: Standalone HLSM enable without monitoring addon (should error) + dec_16 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_16 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + ), + ) + dec_16.context.attach_mc(mc_16) + with self.assertRaises(RequiredArgumentMissingError): + dec_16.update_monitoring_profile_flow_logs(mc_16) + + # Case 17: Standalone HLSM enable without MSI auth (should error) + dec_17 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_17 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={} + ) + }, + ) + dec_17.context.attach_mc(mc_17) + with self.assertRaises(RequiredArgumentMissingError): + dec_17.update_monitoring_profile_flow_logs(mc_17) + + # Case 18: Verify HLSM is NOT triggered when explicitly set to False + dec_18 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": False, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_18 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec_18.context.attach_mc(mc_18) + dec_mc_18 = dec_18.update_monitoring_profile_flow_logs(mc_18) + # HLSM=false should not trigger postprocessing + self.assertFalse(dec_18.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False)) + + # Case 19: Standalone HLSM enable with omsAgent (camelCase key) + dec_19 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_19 = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + ), + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec_19.context.attach_mc(mc_19) + dec_mc_19 = dec_19.update_monitoring_profile_flow_logs(mc_19) + # Verify the intermediate is set to trigger DCR update in postprocessing + self.assertTrue(dec_19.context.get_intermediate("monitoring_addon_postprocessing_required")) + + def test_update_standalone_high_log_scale_mode(self): + """Tests for Bug 1 fix: --enable-high-log-scale-mode standalone on update path. + + Before this fix, --enable-high-log-scale-mode alone on an existing cluster was silently + ignored because monitoring_addon_postprocessing_required was never set, so the DCR was + never updated. + """ + # Case 1: Happy path - monitoring enabled with MSI auth → sets postprocessing intermediate + dec_1 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_1 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_1.context.attach_mc(mc_1) + dec_1.update_monitoring_profile_flow_logs(mc_1) + self.assertTrue(dec_1.context.get_intermediate("monitoring_addon_postprocessing_required")) + + # Case 2: Monitoring addon present but not enabled → RequiredArgumentMissingError + dec_2 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_2 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=False, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_2.context.attach_mc(mc_2) + with self.assertRaises(RequiredArgumentMissingError): + dec_2.update_monitoring_profile_flow_logs(mc_2) + + # Case 3: No monitoring addon at all → RequiredArgumentMissingError + dec_3 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_3 = self.models.ManagedCluster(location="test_location") + dec_3.context.attach_mc(mc_3) + with self.assertRaises(RequiredArgumentMissingError): + dec_3.update_monitoring_profile_flow_logs(mc_3) + + # Case 4: Monitoring enabled but MSI auth missing → RequiredArgumentMissingError + dec_4 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_4 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={}, + ) + }, + ) + dec_4.context.attach_mc(mc_4) + with self.assertRaises(RequiredArgumentMissingError): + dec_4.update_monitoring_profile_flow_logs(mc_4) + + # Case 5: Monitoring enabled but MSI auth set to "false" → RequiredArgumentMissingError + dec_5 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_5 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "false"}, + ) + }, + ) + dec_5.context.attach_mc(mc_5) + with self.assertRaises(RequiredArgumentMissingError): + dec_5.update_monitoring_profile_flow_logs(mc_5) + + # Case 6: enable_high_log_scale_mode=False → no postprocessing, no error + dec_6 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": False}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_6 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_6.context.attach_mc(mc_6) + dec_6.update_monitoring_profile_flow_logs(mc_6) + self.assertFalse( + dec_6.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + # Case 7: Not specified (None) → no postprocessing triggered + dec_7 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_7 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_7.context.attach_mc(mc_7) + dec_7.update_monitoring_profile_flow_logs(mc_7) + self.assertFalse( + dec_7.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + # Case 8: camelCase "omsAgent" key is also recognised + dec_8 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_high_log_scale_mode": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_8 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec_8.context.attach_mc(mc_8) + dec_8.update_monitoring_profile_flow_logs(mc_8) + self.assertTrue(dec_8.context.get_intermediate("monitoring_addon_postprocessing_required")) + def test_update_node_provisioning_profile(self): dec_0 = AKSPreviewManagedClusterUpdateDecorator( self.cmd, From b41a130d6dd2b85d20eb52b785470bb9fad22295 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 11 Mar 2026 10:32:13 +0000 Subject: [PATCH 02/32] Fix tests + lint --- .../managed_cluster_decorator.py | 64 +++++++++++++++++-- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 45c9630b0c8..db4023f36b8 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -5279,6 +5279,19 @@ def immediate_processing_after_request(self, mc: ManagedCluster) -> None: "Could not create a role assignment for subnet. Are you an Owner on this subscription?" ) + def _should_create_dcra(self) -> bool: + """Return True if any flag that triggers a DCRA/DCR create or update was provided.""" + params = self.context.raw_param + return ( + params.get("enable_addons") is not None or + params.get("enable-azure-monitor-logs") is not None or + params.get("enable_container_network_logs") is not None or + params.get("enable_retina_flow_logs") is not None or + params.get("disable_container_network_logs") is not None or + params.get("disable_retina_flow_logs") is not None or + params.get("enable_high_log_scale_mode") is not None + ) + # pylint: disable=too-many-locals,too-many-branches def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: """Postprocessing performed after the cluster is created. @@ -5304,13 +5317,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.external_functions.add_monitoring_role_assignment( cluster, cluster_resource_id, self.cmd ) - elif (self.context.raw_param.get("enable_addons") is not None or - self.context.raw_param.get("enable-azure-monitor-logs") is not None or - self.context.raw_param.get("enable_container_network_logs") is not None or - self.context.raw_param.get("enable_retina_flow_logs") is not None or - self.context.raw_param.get("disable_container_network_logs") is not None or - self.context.raw_param.get("disable_retina_flow_logs") is not None or - self.context.raw_param.get("enable_high_log_scale_mode") is not None): + elif self._should_create_dcra(): # Create/update the DCR when CNL or HLSM flags change so that the DCR streams # (e.g. Microsoft-ContainerLogV2-HighScale) are kept in sync. cnl_or_hlsm_changing = ( @@ -5820,6 +5827,49 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus config = monitoring_addon_profile.config or {} config["enableRetinaNetworkFlags"] = str(container_network_logs_enabled) mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config = config + + # When enabling CNL, the DCR must be updated to add the high-scale stream. + # Set the postprocessing intermediate so that the update path calls ensure_container_insights. + if self.context.raw_param.get("enable_container_network_logs") or \ + self.context.raw_param.get("enable_retina_flow_logs"): + self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) + + # When --enable-high-log-scale-mode is passed standalone on the update path, validate that + # monitoring with MSI auth is already enabled, then trigger the DCR update via postprocessing. + enable_high_log_scale_mode = self.context.raw_param.get("enable_high_log_scale_mode") + if enable_high_log_scale_mode is True: + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") + + # Resolve the addon profile, handling both "omsagent" and "omsAgent" key variants. + monitoring_addon_profile = None + if mc.addon_profiles: + monitoring_addon_profile = ( + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + ) + + if not monitoring_addon_profile or not monitoring_addon_profile.enabled: + raise RequiredArgumentMissingError( + "--enable-high-log-scale-mode requires the Azure Monitor logs addon (omsagent) " + "to be enabled on the cluster. Please enable it first with " + "--enable-addons monitoring or --enable-azure-monitor-logs." + ) + + addon_config = monitoring_addon_profile.config or {} + msi_auth_enabled = ( + CONST_MONITORING_USING_AAD_MSI_AUTH in addon_config and + str(addon_config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == "true" + ) + if not msi_auth_enabled: + raise RequiredArgumentMissingError( + "--enable-high-log-scale-mode requires MSI authentication to be enabled " + "for the monitoring addon. Please enable it with --enable-msi-auth-for-monitoring." + ) + + self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) + return mc # pylint: disable=too-many-statements,too-many-locals,too-many-branches From 2afb48ce73ac9afe5002194cec856b2264ba759a Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 12 Mar 2026 15:33:31 +0000 Subject: [PATCH 03/32] Add unit tests --- .../latest/test_managed_cluster_decorator.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index fbc7bd1248c..b6f4f41f931 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -4872,6 +4872,23 @@ def test_get_enable_high_log_scale_mode_explicit_true(self): result = ctx.get_enable_high_log_scale_mode() self.assertTrue(result) + def test_get_enable_high_log_scale_mode_flag_without_value_create(self): + """Test passing --enable-high-log-scale-mode without an explicit boolean value. + + When using get_three_state_flag() and the user passes the flag without a value + (e.g. --enable-high-log-scale-mode), argparse sets it to True via nargs='?'. + This should behave identically to passing --enable-high-log-scale-mode true. + """ + # get_three_state_flag uses nargs='?'; no value => True + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({"enable_high_log_scale_mode": True}), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + result = ctx.get_enable_high_log_scale_mode() + self.assertTrue(result) + def test_get_enable_high_log_scale_mode_auto_enable_with_container_network_logs(self): """Test auto-enable when container network logs are enabled with proper prerequisites.""" ctx = AKSPreviewManagedClusterContext( @@ -5035,6 +5052,31 @@ def test_get_enable_high_log_scale_mode_update_error_without_existing_monitoring with self.assertRaises(RequiredArgumentMissingError): ctx.get_enable_high_log_scale_mode() + def test_get_enable_high_log_scale_mode_flag_without_value_update(self): + """Test passing --enable-high-log-scale-mode without an explicit boolean in update mode. + + When using get_three_state_flag() and the user passes the flag without a value + (e.g. --enable-high-log-scale-mode), argparse sets it to True via nargs='?'. + In update mode this should enable HLSM on the existing cluster. + """ + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({"enable_high_log_scale_mode": True}), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + ctx.attach_mc(mc) + result = ctx.get_enable_high_log_scale_mode() + self.assertTrue(result) + def test_get_container_network_logs_returns_none_when_not_specified(self): """Test get_container_network_logs returns None when neither enable nor disable is specified.""" ctx = AKSPreviewManagedClusterContext( From c3ebe8ee11e9e0e46e36800b10e795d462bac6c2 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 12 Mar 2026 16:43:07 +0000 Subject: [PATCH 04/32] More tests --- .../latest/test_managed_cluster_decorator.py | 337 ++++++++++++++++++ 1 file changed, 337 insertions(+) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index b6f4f41f931..04394ef25b9 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -5237,6 +5237,161 @@ def test_get_container_network_logs_legacy_retina_flow_logs_param(self): result = ctx.get_container_network_logs(mc) self.assertTrue(result) + def test_get_container_network_logs_with_azure_monitor_logs(self): + """Test get_container_network_logs succeeds when monitoring is enabled via enable_azure_monitor_logs param.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_container_network_logs": True, + "enable_acns": True, + "enable_azure_monitor_logs": True, + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + ) + ctx.attach_mc(mc) + result = ctx.get_container_network_logs(mc) + self.assertTrue(result) + + def test_get_container_network_logs_legacy_disable_retina_flow_logs(self): + """Test get_container_network_logs returns False when legacy disable_retina_flow_logs is specified.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "disable_retina_flow_logs": True, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster(location="test_location") + ctx.attach_mc(mc) + result = ctx.get_container_network_logs(mc) + self.assertFalse(result) + + def test_get_container_network_logs_with_acns_already_on_mc(self): + """Test get_container_network_logs succeeds when ACNS is already enabled on mc (not via raw param).""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_container_network_logs": True, + "enable_addons": "monitoring", + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + ) + ctx.attach_mc(mc) + result = ctx.get_container_network_logs(mc) + self.assertTrue(result) + + def test_get_enable_high_log_scale_mode_cnl_with_explicit_true(self): + """Test when user enables both CNL and HLSM=True explicitly. Should succeed and return True.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_container_network_logs": True, + "enable_high_log_scale_mode": True, + "enable_acns": True, + "enable_addons": "monitoring", + }), + self.models, + decorator_mode=DecoratorMode.CREATE, + ) + result = ctx.get_enable_high_log_scale_mode() + self.assertTrue(result) + + def test_get_enable_high_log_scale_mode_update_explicit_false_without_cnl(self): + """Test that HLSM=False without CNL returns False in update mode without error.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_high_log_scale_mode": False, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + ctx.attach_mc(mc) + result = ctx.get_enable_high_log_scale_mode() + self.assertFalse(result) + + def test_get_enable_high_log_scale_mode_update_error_explicit_false_with_cnl(self): + """Test error when user explicitly disables HLSM with CNL enabled in update mode.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_container_network_logs": True, + "enable_high_log_scale_mode": False, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + ctx.attach_mc(mc) + with self.assertRaises(MutuallyExclusiveArgumentError): + ctx.get_enable_high_log_scale_mode() + + def test_get_enable_high_log_scale_mode_update_monitoring_camelcase_key(self): + """Test auto-enable HLSM in update mode when monitoring uses camelCase 'omsAgent' key.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_container_network_logs": True, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + ctx.attach_mc(mc) + result = ctx.get_enable_high_log_scale_mode() + self.assertTrue(result) + def test_get_enable_default_domain(self): # default value ctx_1 = AKSPreviewManagedClusterContext( @@ -5921,6 +6076,65 @@ def test_set_up_addon_profiles_auto_enables_high_log_scale_mode_with_cnl(self): # Verify high log scale mode is auto-enabled self.assertTrue(dec.context.get_enable_high_log_scale_mode()) + def test_set_up_addon_profiles_cnl_and_hlsm_flag_without_value(self): + """Regression test: CREATE with --enable-container-network-logs --enable-acns + --enable-addons monitoring --enable-high-log-scale-mode (flag without boolean value). + + When --enable-high-log-scale-mode is passed without a value, get_three_state_flag() + sets it to True via nargs='?'. This must NOT trigger the 'Container network logs + requires --enable-acns...' error when ACNS and monitoring are both provided. + """ + dec = AKSPreviewManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_addons": "monitoring", + "enable_container_network_logs": True, + "enable_high_log_scale_mode": True, # simulates --enable-high-log-scale-mode without value + "enable_acns": True, + "workspace_resource_id": "test_workspace_resource_id", + "enable_msi_auth_for_monitoring": True, + "enable_syslog": False, + "data_collection_settings": None, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + network_profile = self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking(enabled=True), + ) + mc = self.models.ManagedCluster(location="test_location", network_profile=network_profile) + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test_subscription_id") + external_functions = dec.context.external_functions + with patch.object(external_functions, 'ensure_container_insights_for_monitoring', return_value=None): + # Should NOT raise InvalidArgumentValueError + dec_mc = dec.set_up_addon_profiles(mc) + self.assertTrue(dec.context.get_enable_high_log_scale_mode()) + + def test_set_up_addon_profiles_hlsm_only_no_cnl(self): + """Test that enabling HLSM without CNL does not set enableRetinaNetworkFlags in config.""" + dec = AKSPreviewManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_addons": "monitoring", + "enable_high_log_scale_mode": True, + "workspace_resource_id": "test_workspace_resource_id", + "enable_msi_auth_for_monitoring": True, + "enable_syslog": False, + "data_collection_settings": None, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster(location="test_location") + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test_subscription_id") + external_functions = dec.context.external_functions + with patch.object(external_functions, 'ensure_container_insights_for_monitoring', return_value=None): + dec_mc = dec.set_up_addon_profiles(mc) + # enableRetinaNetworkFlags should NOT be set when CNL is not enabled + omsagent_config = dec_mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config + self.assertNotIn("enableRetinaNetworkFlags", omsagent_config) def test_set_up_http_proxy_config(self): dec_1 = AKSPreviewManagedClusterCreateDecorator( @@ -13945,6 +14159,129 @@ def test_update_standalone_high_log_scale_mode(self): dec_8.update_monitoring_profile_flow_logs(mc_8) self.assertTrue(dec_8.context.get_intermediate("monitoring_addon_postprocessing_required")) + def test_update_monitoring_profile_flow_logs_no_flags_noop(self): + """Test that update_monitoring_profile_flow_logs is a no-op when no CNL/HLSM flags are specified.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "True"}, + ) + }, + ) + dec.context.attach_mc(mc) + dec_mc = dec.update_monitoring_profile_flow_logs(mc) + # Existing config should remain unchanged + self.assertEqual( + dec_mc.addon_profiles["omsagent"].config["enableRetinaNetworkFlags"], + "True", + ) + self.assertFalse( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + def test_update_enable_cnl_with_azure_monitor_logs_on_cluster(self): + """Test enabling CNL on update when monitoring was enabled via enable_azure_monitor_logs on existing cluster.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + "enable_azure_monitor_logs": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + dec.context.attach_mc(mc) + dec_mc = dec.update_monitoring_profile_flow_logs(mc) + self.assertEqual( + dec_mc.addon_profiles["omsagent"].config["enableRetinaNetworkFlags"], + "True", + ) + self.assertTrue(dec.context.get_intermediate("monitoring_addon_postprocessing_required")) + + def test_update_cnl_explicit_true_hlsm_with_prerequisites(self): + """Test enabling CNL + HLSM=True explicitly on update with all prerequisites met.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + "enable_high_log_scale_mode": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec.context.attach_mc(mc) + dec_mc = dec.update_monitoring_profile_flow_logs(mc) + self.assertEqual( + dec_mc.addon_profiles["omsagent"].config["enableRetinaNetworkFlags"], + "True", + ) + self.assertTrue(dec.context.get_enable_high_log_scale_mode()) + self.assertTrue(dec.context.get_intermediate("monitoring_addon_postprocessing_required")) + + def test_update_disable_hlsm_standalone_no_postprocessing(self): + """Test that disabling HLSM standalone (no CNL flag) does not trigger postprocessing.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": False, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec.context.attach_mc(mc) + dec.update_monitoring_profile_flow_logs(mc) + self.assertFalse( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + def test_update_node_provisioning_profile(self): dec_0 = AKSPreviewManagedClusterUpdateDecorator( self.cmd, From 1dd472686da6913b96e34b76d083e10f453f26d2 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 12 Mar 2026 16:51:39 +0000 Subject: [PATCH 05/32] Update history --- src/aks-preview/HISTORY.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/aks-preview/HISTORY.rst b/src/aks-preview/HISTORY.rst index 16968a8a68b..7fda2a870ce 100644 --- a/src/aks-preview/HISTORY.rst +++ b/src/aks-preview/HISTORY.rst @@ -55,6 +55,11 @@ Pending * `az aks approuting gateway istio enable/disable`: Add new subcommands to enable or disable the Istio Gateway API implementation for App Routing on an existing cluster. * Add 'mTLS' as a transit encryption type option for `--acns-transit-encryption-type` in `az aks create/update` +19.0.0b26 ++++++++ +* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. +* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled. + 19.0.0b25 +++++++ * `az aks create`: Add `--enable-continuous-control-plane-and-addon-monitor` to enable continuous control plane and addon monitor. From 76f815e940dea5f7f7fbe6ce9f9dd360f8fe4917 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Fri, 13 Mar 2026 15:45:21 +0000 Subject: [PATCH 06/32] Fix addon and msi auth bug --- .../managed_cluster_decorator.py | 45 +++++-- .../latest/test_managed_cluster_decorator.py | 123 ++++++++++++++++++ 2 files changed, 160 insertions(+), 8 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index db4023f36b8..f8fb48f3554 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -428,7 +428,24 @@ def get_enable_msi_auth_for_monitoring(self) -> Union[bool, None]: elif enable_azure_monitor_logs: result = True elif enable_msi_auth_for_monitoring is False: - result = False + # The base class returns False when service_principal_profile.client_id is not None, + # but MSI-based clusters set client_id to "msi". Check if the monitoring addon + # already has useAADAuth=true, which indicates MSI auth is actually in use. + addon_consts = self.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") + if self.mc and self.mc.addon_profiles: + monitoring_profile = ( + self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or + self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + ) + if (monitoring_profile and monitoring_profile.config and + str(monitoring_profile.config.get(CONST_MONITORING_USING_AAD_MSI_AUTH, "")).lower() == "true"): + result = True + else: + result = False + else: + result = False elif enable_msi_auth_for_monitoring is None and not disable_msi_auth and not enable_msi_auth: result = True else: @@ -5329,9 +5346,14 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: ) addon_consts = self.context.get_addon_consts() CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + # The API response may return the addon key as either "omsagent" or "omsAgent" + monitoring_addon_key = CONST_MONITORING_ADDON_NAME + if CONST_MONITORING_ADDON_NAME not in cluster.addon_profiles and \ + CONST_MONITORING_ADDON_NAME_CAMELCASE in cluster.addon_profiles: + monitoring_addon_key = CONST_MONITORING_ADDON_NAME_CAMELCASE self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, - cluster.addon_profiles[CONST_MONITORING_ADDON_NAME], + cluster.addon_profiles[monitoring_addon_key], self.context.get_subscription_id(), self.context.get_resource_group_name(), self.context.get_name(), @@ -8098,14 +8120,21 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - if (cluster.addon_profiles and - CONST_MONITORING_ADDON_NAME in cluster.addon_profiles and - cluster.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled): + # The API response may return the addon key as either "omsagent" or "omsAgent" + monitoring_addon_key = None + if cluster.addon_profiles: + if CONST_MONITORING_ADDON_NAME in cluster.addon_profiles: + monitoring_addon_key = CONST_MONITORING_ADDON_NAME + elif CONST_MONITORING_ADDON_NAME_CAMELCASE in cluster.addon_profiles: + monitoring_addon_key = CONST_MONITORING_ADDON_NAME_CAMELCASE + + if (monitoring_addon_key and + cluster.addon_profiles[monitoring_addon_key].enabled): # Check if MSI auth is enabled if (CONST_MONITORING_USING_AAD_MSI_AUTH in - cluster.addon_profiles[CONST_MONITORING_ADDON_NAME].config and - str(cluster.addon_profiles[CONST_MONITORING_ADDON_NAME].config[ + cluster.addon_profiles[monitoring_addon_key].config and + str(cluster.addon_profiles[monitoring_addon_key].config[ CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == "true"): # Check parameter sizes to identify what might be causing large headers @@ -8120,7 +8149,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, - cluster.addon_profiles[CONST_MONITORING_ADDON_NAME], + cluster.addon_profiles[monitoring_addon_key], self.context.get_subscription_id(), self.context.get_resource_group_name(), self.context.get_name(), diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index 04394ef25b9..3428fa7df3a 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -43,6 +43,7 @@ CONST_APP_ROUTING_ISTIO_MODE_ENABLED, CONST_APP_ROUTING_ISTIO_MODE_DISABLED, CONST_MONITORING_ADDON_NAME, + CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, CONST_MONITORING_USING_AAD_MSI_AUTH, CONST_NODEPOOL_MODE_SYSTEM, @@ -5392,6 +5393,36 @@ def test_get_enable_high_log_scale_mode_update_monitoring_camelcase_key(self): result = ctx.get_enable_high_log_scale_mode() self.assertTrue(result) + def test_get_enable_msi_auth_for_monitoring_with_msi_service_principal(self): + """Test that MSI auth is correctly detected when service_principal_profile.client_id='msi'. + + The base class returns False when client_id is not None, but MSI-based clusters set + client_id to 'msi'. The preview override should check the addon config for useAADAuth. + """ + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({}), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + service_principal_profile=self.models.ManagedClusterServicePrincipalProfile( + client_id="msi", + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + ctx.attach_mc(mc) + result = ctx.get_enable_msi_auth_for_monitoring() + self.assertTrue(result) + def test_get_enable_default_domain(self): # default value ctx_1 = AKSPreviewManagedClusterContext( @@ -8357,6 +8388,33 @@ def test_postprocessing_create_dcr_true_when_cnl_and_hlsm_both_set(self): _, kwargs = mock_ecifm.call_args self.assertTrue(kwargs["create_dcr"]) + def test_postprocessing_create_dcr_true_with_camelcase_addon_key(self): + """create_dcr=True when the API response uses the camelCase 'omsAgent' addon key. + + The API may return 'omsAgent' instead of 'omsagent'. The postprocessing must + handle both key variants to ensure the DCR is updated. + """ + dec = self._make_postprocessing_decorator( + {"enable_container_network_logs": True, "enable_high_log_scale_mode": True} + ) + # Build cluster with camelCase addon key (as the API might return) + cluster = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True) + }, + ) + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm, patch.object( + dec.context, "get_enable_high_log_scale_mode", return_value=True + ): + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + def test_set_up_health_monitor_profile(self): # no flag - no change @@ -14282,6 +14340,71 @@ def test_update_disable_hlsm_standalone_no_postprocessing(self): dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) ) + def test_update_postprocessing_with_camelcase_addon_key(self): + """Test that update postprocessing works when the API response uses 'omsAgent' (camelCase). + + The API may return the monitoring addon as 'omsAgent' instead of 'omsagent'. + The postprocessing must handle both key variants so the DCR gets updated. + """ + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_container_network_logs": True, + "enable_high_log_scale_mode": True, + "name": "test_name", + "resource_group_name": "test_rg_name", + "location": "test_location", + "enable_msi_auth_for_monitoring": True, + "enable_syslog": False, + "data_collection_settings": None, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test_subscription_id") + # Simulate profile update setting the postprocessing flag + dec.update_monitoring_profile_flow_logs(mc) + self.assertTrue(dec.context.get_intermediate("monitoring_addon_postprocessing_required")) + + # Build API response cluster with camelCase addon key + cluster = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + self.assertTrue(kwargs["enable_high_log_scale_mode"]) + def test_update_node_provisioning_profile(self): dec_0 = AKSPreviewManagedClusterUpdateDecorator( self.cmd, From 57758255325afdd58dcc5370aa5ed8a2d2b24a26 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Fri, 13 Mar 2026 16:00:15 +0000 Subject: [PATCH 07/32] Lint --- .../azext_aks_preview/managed_cluster_decorator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index f8fb48f3554..d2871f566d1 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -439,11 +439,11 @@ def get_enable_msi_auth_for_monitoring(self) -> Union[bool, None]: self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) ) - if (monitoring_profile and monitoring_profile.config and - str(monitoring_profile.config.get(CONST_MONITORING_USING_AAD_MSI_AUTH, "")).lower() == "true"): - result = True - else: - result = False + result = bool( + monitoring_profile and monitoring_profile.config and + str(monitoring_profile.config.get( + CONST_MONITORING_USING_AAD_MSI_AUTH, "")).lower() == "true" + ) else: result = False elif enable_msi_auth_for_monitoring is None and not disable_msi_auth and not enable_msi_auth: From 2b5b9c8bf43540073441bcff842ccbd05e11cd68 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 16 Mar 2026 12:36:43 +0000 Subject: [PATCH 08/32] Fix update validation + tests --- .../managed_cluster_decorator.py | 44 ++++++++++ .../latest/test_managed_cluster_decorator.py | 81 ++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index d2871f566d1..01328510011 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -3028,6 +3028,27 @@ def get_enable_high_log_scale_mode(self) -> Union[bool, None]: # Auto-enable high log scale mode return True + # If user explicitly disables HLSM, check if CNL is already enabled on the cluster + if enable_high_log_scale_mode is False: + cnl_already_enabled = False + if self.mc and self.mc.addon_profiles: + addon_consts = self.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + monitoring_profile = ( + self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or + self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + ) + if monitoring_profile and monitoring_profile.config: + cnl_already_enabled = str( + monitoring_profile.config.get("enableRetinaNetworkFlags", "") + ).lower() == "true" + if cnl_already_enabled: + raise MutuallyExclusiveArgumentError( + "Cannot explicitly disable --enable-high-log-scale-mode while " + "container network logs are enabled on the cluster. " + "Please disable container network logs first with --disable-container-network-logs." + ) + # If container network logs are not being enabled, return the original value # Return False if not explicitly set to maintain backward compatibility with base class if enable_high_log_scale_mode is None: @@ -5650,6 +5671,7 @@ def get_special_parameter_default_value_pairs_list(self) -> List[Tuple[Any, Any] (self.context.get_nat_gateway_managed_outbound_ipv6_count(), None), (self.context.get_nat_gateway_outbound_ip_ids(), None), (self.context.get_nat_gateway_outbound_ip_prefix_ids(), None), + (self.context.raw_param.get("enable_high_log_scale_mode"), None), ] def check_raw_parameters(self): @@ -5892,6 +5914,28 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) + elif enable_high_log_scale_mode is False: + # Check if CNL is already enabled on the cluster — cannot disable HLSM while CNL is on + cnl_already_enabled = False + if mc.addon_profiles: + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + monitoring_profile = ( + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + ) + if monitoring_profile and monitoring_profile.config: + cnl_already_enabled = str( + monitoring_profile.config.get("enableRetinaNetworkFlags", "") + ).lower() == "true" + if cnl_already_enabled: + raise MutuallyExclusiveArgumentError( + "Cannot explicitly disable --enable-high-log-scale-mode while " + "container network logs are enabled on the cluster. " + "Please disable container network logs first with --disable-container-network-logs." + ) + self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) + return mc # pylint: disable=too-many-statements,too-many-locals,too-many-branches diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index 3428fa7df3a..44916660b54 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -5366,6 +5366,57 @@ def test_get_enable_high_log_scale_mode_update_error_explicit_false_with_cnl(sel with self.assertRaises(MutuallyExclusiveArgumentError): ctx.get_enable_high_log_scale_mode() + def test_get_enable_high_log_scale_mode_update_error_disable_hlsm_with_existing_cnl(self): + """Test error when user disables HLSM while CNL is already enabled on the cluster. + + When CNL (enableRetinaNetworkFlags) is already set to 'True' on the existing cluster + and the user passes --enable-high-log-scale-mode false without --enable-container-network-logs, + the method should raise a MutuallyExclusiveArgumentError. + """ + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_high_log_scale_mode": False, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "True"}, + ) + }, + ) + ctx.attach_mc(mc) + with self.assertRaises(MutuallyExclusiveArgumentError): + ctx.get_enable_high_log_scale_mode() + + def test_get_enable_high_log_scale_mode_update_error_disable_hlsm_with_existing_cnl_camelcase(self): + """Test error when user disables HLSM while CNL is already enabled (omsAgent camelCase key).""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_high_log_scale_mode": False, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "True"}, + ) + }, + ) + ctx.attach_mc(mc) + with self.assertRaises(MutuallyExclusiveArgumentError): + ctx.get_enable_high_log_scale_mode() + def test_get_enable_high_log_scale_mode_update_monitoring_camelcase_key(self): """Test auto-enable HLSM in update mode when monitoring uses camelCase 'omsAgent' key.""" ctx = AKSPreviewManagedClusterContext( @@ -14153,7 +14204,7 @@ def test_update_standalone_high_log_scale_mode(self): with self.assertRaises(RequiredArgumentMissingError): dec_5.update_monitoring_profile_flow_logs(mc_5) - # Case 6: enable_high_log_scale_mode=False → no postprocessing, no error + # Case 6: enable_high_log_scale_mode=False (no CNL) → postprocessing triggered to update DCR dec_6 = AKSPreviewManagedClusterUpdateDecorator( self.cmd, self.client, @@ -14171,7 +14222,7 @@ def test_update_standalone_high_log_scale_mode(self): ) dec_6.context.attach_mc(mc_6) dec_6.update_monitoring_profile_flow_logs(mc_6) - self.assertFalse( + self.assertTrue( dec_6.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) ) @@ -14340,6 +14391,32 @@ def test_update_disable_hlsm_standalone_no_postprocessing(self): dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) ) + def test_update_disable_hlsm_error_when_cnl_already_enabled(self): + """Test that disabling HLSM raises error when CNL is already enabled on the cluster.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": False, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "enableRetinaNetworkFlags": "True", + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + } + ) + }, + ) + dec.context.attach_mc(mc) + with self.assertRaises(MutuallyExclusiveArgumentError): + dec.update_monitoring_profile_flow_logs(mc) + def test_update_postprocessing_with_camelcase_addon_key(self): """Test that update postprocessing works when the API response uses 'omsAgent' (camelCase). From 6554065a623d9e84b36aba07e362af1e172363f0 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 16 Mar 2026 17:06:39 +0000 Subject: [PATCH 09/32] Fix disable cnls --- .../managed_cluster_decorator.py | 12 +++- .../latest/test_managed_cluster_decorator.py | 57 ++++++++++++++++++- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 01328510011..1081954ba08 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -5866,11 +5866,19 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus if mc.addon_profiles: addon_consts = self.context.get_addon_consts() CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - monitoring_addon_profile = mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) + # Handle both "omsagent" and "omsAgent" key variants + monitoring_addon_profile = ( + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + ) if monitoring_addon_profile: + monitoring_addon_key = ( + CONST_MONITORING_ADDON_NAME if CONST_MONITORING_ADDON_NAME in mc.addon_profiles + else CONST_MONITORING_ADDON_NAME_CAMELCASE + ) config = monitoring_addon_profile.config or {} config["enableRetinaNetworkFlags"] = str(container_network_logs_enabled) - mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config = config + mc.addon_profiles[monitoring_addon_key].config = config # When enabling CNL, the DCR must be updated to add the high-scale stream. # Set the postprocessing intermediate so that the update path calls ensure_container_insights. diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index 44916660b54..6c6014a4844 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -13942,6 +13942,59 @@ def test_enable_container_network_logs(self): ) self.assertEqual(dec_mc_13, ground_truth_mc_13) + # Case 13b: Disable CNL with omsAgent (camelCase key) - verifies the fix + # for the bug where disable-container-network-logs didn't work when Azure API + # returned the addon profile key as "omsAgent" instead of "omsagent" + dec_13b = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_container_network_logs": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc_13b = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "True"} + ) + }, + ) + dec_13b.context.attach_mc(mc_13b) + dec_mc_13b = dec_13b.update_monitoring_profile_flow_logs(mc_13b) + ground_truth_mc_13b = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + network_plugin="azure", + network_plugin_mode="overlay", + network_dataplane="cilium", + pod_cidr="100.64.0.0/16", + service_cidr="192.168.0.0/16", + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={"enableRetinaNetworkFlags": "False"} + ) + }, + ) + self.assertEqual(dec_mc_13b, ground_truth_mc_13b) + # Case 14: Verify monitoring_addon_postprocessing_required is set when using deprecated flag (update path) dec_14 = AKSPreviewManagedClusterUpdateDecorator( self.cmd, @@ -14074,8 +14127,8 @@ def test_enable_container_network_logs(self): ) dec_18.context.attach_mc(mc_18) dec_mc_18 = dec_18.update_monitoring_profile_flow_logs(mc_18) - # HLSM=false should not trigger postprocessing - self.assertFalse(dec_18.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False)) + # HLSM=false should trigger postprocessing to update DCR (remove high-scale stream) + self.assertTrue(dec_18.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False)) # Case 19: Standalone HLSM enable with omsAgent (camelCase key) dec_19 = AKSPreviewManagedClusterUpdateDecorator( From 6552b03803d1f8292a8761543b10425052a08c5a Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 16 Mar 2026 17:32:05 +0000 Subject: [PATCH 10/32] Fix unit tests --- .../tests/latest/test_managed_cluster_decorator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index 6c6014a4844..64f12ea8027 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -14417,8 +14417,8 @@ def test_update_cnl_explicit_true_hlsm_with_prerequisites(self): self.assertTrue(dec.context.get_enable_high_log_scale_mode()) self.assertTrue(dec.context.get_intermediate("monitoring_addon_postprocessing_required")) - def test_update_disable_hlsm_standalone_no_postprocessing(self): - """Test that disabling HLSM standalone (no CNL flag) does not trigger postprocessing.""" + def test_update_disable_hlsm_standalone_triggers_postprocessing(self): + """Test that disabling HLSM standalone (no CNL flag) triggers postprocessing to update DCR.""" dec = AKSPreviewManagedClusterUpdateDecorator( self.cmd, self.client, @@ -14440,7 +14440,7 @@ def test_update_disable_hlsm_standalone_no_postprocessing(self): ) dec.context.attach_mc(mc) dec.update_monitoring_profile_flow_logs(mc) - self.assertFalse( + self.assertTrue( dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) ) From 81580c2c4a7bc41c695fae6fee876d4ff09aebcc Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 18 Mar 2026 14:59:11 +0000 Subject: [PATCH 11/32] PR comment - refactoring --- .../managed_cluster_decorator.py | 193 +++++++++--------- .../latest/test_managed_cluster_decorator.py | 19 +- 2 files changed, 118 insertions(+), 94 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 1081954ba08..48175c92e1b 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -172,6 +172,21 @@ ResourceReference = TypeVar("ResourceReference") +def _get_monitoring_addon_key(cluster, addon_consts): + """Resolve the monitoring addon key from the cluster's addon_profiles. + + The API response may return the addon key as either "omsagent" or "omsAgent". + Returns the key present in addon_profiles, or the default constant if neither is found. + """ + const_monitoring = addon_consts.get("CONST_MONITORING_ADDON_NAME") + if cluster.addon_profiles: + if const_monitoring in cluster.addon_profiles: + return const_monitoring + if CONST_MONITORING_ADDON_NAME_CAMELCASE in cluster.addon_profiles: + return CONST_MONITORING_ADDON_NAME_CAMELCASE + return const_monitoring + + # pylint: disable=too-few-public-methods class AKSPreviewManagedClusterModels(AKSManagedClusterModels): """Store the models used in aks series of commands. @@ -5322,7 +5337,14 @@ def _should_create_dcra(self) -> bool: params = self.context.raw_param return ( params.get("enable_addons") is not None or - params.get("enable-azure-monitor-logs") is not None or + params.get("enable_azure_monitor_logs") is not None or + self._is_cnl_or_hlsm_changing() + ) + + def _is_cnl_or_hlsm_changing(self) -> bool: + """Return True if any CNL or High Log Scale Mode flag was provided.""" + params = self.context.raw_param + return ( params.get("enable_container_network_logs") is not None or params.get("enable_retina_flow_logs") is not None or params.get("disable_container_network_logs") is not None or @@ -5330,65 +5352,88 @@ def _should_create_dcra(self) -> bool: params.get("enable_high_log_scale_mode") is not None ) - # pylint: disable=too-many-locals,too-many-branches - def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: - """Postprocessing performed after the cluster is created. + def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: + """Handle monitoring addon postprocessing for the enable case.""" + enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() + if not enable_msi_auth_for_monitoring: + # add cluster spn/msi Monitoring Metrics Publisher role assignment to publish metrics to MDM + # mdm metrics is supported only in azure public cloud, so add the role assignment only in this cloud + cloud_name = self.cmd.cli_ctx.cloud.name + if cloud_name.lower() == "azurecloud": + cluster_resource_id = resource_id( + subscription=self.context.get_subscription_id(), + resource_group=self.context.get_resource_group_name(), + namespace="Microsoft.ContainerService", + type="managedClusters", + name=self.context.get_name(), + ) + self.context.external_functions.add_monitoring_role_assignment( + cluster, cluster_resource_id, self.cmd + ) + elif self._should_create_dcra(): + addon_consts = self.context.get_addon_consts() + monitoring_addon_key = _get_monitoring_addon_key(cluster, addon_consts) + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + cluster.addon_profiles[monitoring_addon_key], + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=False, + aad_route=self.context.get_enable_msi_auth_for_monitoring(), + create_dcr=self._is_cnl_or_hlsm_changing(), + create_dcra=True, + enable_syslog=self.context.get_enable_syslog(), + data_collection_settings=self.context.get_data_collection_settings(), + is_private_cluster=self.context.get_enable_private_cluster(), + ampls_resource_id=self.context.get_ampls_resource_id(), + enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), + ) - :return: None - """ - # monitoring addon - monitoring_addon_enabled = self.context.get_intermediate("monitoring_addon_enabled", default_value=False) - if monitoring_addon_enabled: - enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() - if not enable_msi_auth_for_monitoring: - # add cluster spn/msi Monitoring Metrics Publisher role assignment to publish metrics to MDM - # mdm metrics is supported only in azure public cloud, so add the role assignment only in this cloud - cloud_name = self.cmd.cli_ctx.cloud.name - if cloud_name.lower() == "azurecloud": - cluster_resource_id = resource_id( - subscription=self.context.get_subscription_id(), - resource_group=self.context.get_resource_group_name(), - namespace="Microsoft.ContainerService", - type="managedClusters", - name=self.context.get_name(), - ) - self.context.external_functions.add_monitoring_role_assignment( - cluster, cluster_resource_id, self.cmd - ) - elif self._should_create_dcra(): - # Create/update the DCR when CNL or HLSM flags change so that the DCR streams - # (e.g. Microsoft-ContainerLogV2-HighScale) are kept in sync. - cnl_or_hlsm_changing = ( - self.context.raw_param.get("enable_container_network_logs") is not None or - self.context.raw_param.get("enable_retina_flow_logs") is not None or - self.context.raw_param.get("disable_container_network_logs") is not None or - self.context.raw_param.get("disable_retina_flow_logs") is not None or - self.context.raw_param.get("enable_high_log_scale_mode") is not None - ) - addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - # The API response may return the addon key as either "omsagent" or "omsAgent" - monitoring_addon_key = CONST_MONITORING_ADDON_NAME - if CONST_MONITORING_ADDON_NAME not in cluster.addon_profiles and \ - CONST_MONITORING_ADDON_NAME_CAMELCASE in cluster.addon_profiles: - monitoring_addon_key = CONST_MONITORING_ADDON_NAME_CAMELCASE + def _postprocess_monitoring_disable(self) -> None: + """Handle monitoring addon postprocessing for the disable case.""" + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + + # Get the current cluster state to check config before it was disabled + current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) + + if (current_cluster.addon_profiles and + CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles): + + addon_profile = current_cluster.addon_profiles[CONST_MONITORING_ADDON_NAME] + + try: self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, - cluster.addon_profiles[monitoring_addon_key], + addon_profile, self.context.get_subscription_id(), self.context.get_resource_group_name(), self.context.get_name(), self.context.get_location(), - remove_monitoring=False, - aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=cnl_or_hlsm_changing, + remove_monitoring=True, + aad_route=True, + create_dcr=False, create_dcra=True, - enable_syslog=self.context.get_enable_syslog(), - data_collection_settings=self.context.get_data_collection_settings(), - is_private_cluster=self.context.get_enable_private_cluster(), - ampls_resource_id=self.context.get_ampls_resource_id(), - enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), + enable_syslog=False, + data_collection_settings=None, + ampls_resource_id=None, + enable_high_log_scale_mode=False ) + except TypeError: + pass + + # pylint: disable=too-many-locals,too-many-branches + def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: + """Postprocessing performed after the cluster is created. + + :return: None + """ + # monitoring addon + monitoring_addon_enabled = self.context.get_intermediate("monitoring_addon_enabled", default_value=False) + if monitoring_addon_enabled: + self._postprocess_monitoring_enable(cluster) # Handle monitoring addon postprocessing (disable case) - same logic as aks_disable_addons monitoring_addon_disable_postprocessing_required = self.context.get_intermediate( @@ -5396,39 +5441,7 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: ) if monitoring_addon_disable_postprocessing_required: - addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - - # Get the current cluster state to check config before it was disabled - current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) - - if (current_cluster.addon_profiles and - CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles): - - # Use the current cluster addon profile for cleanup - addon_profile = current_cluster.addon_profiles[CONST_MONITORING_ADDON_NAME] - - # Call ensure_container_insights_for_monitoring with remove_monitoring=True (same as aks_disable_addons) - try: - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - addon_profile, - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=True, - aad_route=True, - create_dcr=False, - create_dcra=True, - enable_syslog=False, - data_collection_settings=None, - ampls_resource_id=None, - enable_high_log_scale_mode=False - ) - except TypeError: - # Ignore TypeError just like aks_disable_addons does - pass + self._postprocess_monitoring_disable() # ingress appgw addon ingress_appgw_addon_enabled = self.context.get_intermediate("ingress_appgw_addon_enabled", default_value=False) @@ -8169,18 +8182,12 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: ) if monitoring_addon_postprocessing_required: addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - # The API response may return the addon key as either "omsagent" or "omsAgent" - monitoring_addon_key = None - if cluster.addon_profiles: - if CONST_MONITORING_ADDON_NAME in cluster.addon_profiles: - monitoring_addon_key = CONST_MONITORING_ADDON_NAME - elif CONST_MONITORING_ADDON_NAME_CAMELCASE in cluster.addon_profiles: - monitoring_addon_key = CONST_MONITORING_ADDON_NAME_CAMELCASE + monitoring_addon_key = _get_monitoring_addon_key(cluster, addon_consts) - if (monitoring_addon_key and + if (cluster.addon_profiles and + monitoring_addon_key in cluster.addon_profiles and cluster.addon_profiles[monitoring_addon_key].enabled): # Check if MSI auth is enabled diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index 64f12ea8027..887eb77484d 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -8304,7 +8304,7 @@ def test_postprocessing_create_dcr_false_when_only_enable_addons(self): def test_postprocessing_ensure_container_insights_not_called_without_relevant_flags(self): """ensure_container_insights_for_monitoring is NOT called when no relevant flags are set. - When neither enable_addons, enable-azure-monitor-logs, nor any CNL/HLSM flag is + When neither enable_addons, enable_azure_monitor_logs, nor any CNL/HLSM flag is present in raw_params, the outer elif is not entered. """ dec = self._make_postprocessing_decorator({}) @@ -8466,6 +8466,23 @@ def test_postprocessing_create_dcr_true_with_camelcase_addon_key(self): _, kwargs = mock_ecifm.call_args self.assertTrue(kwargs["create_dcr"]) + def test_postprocessing_create_dcr_false_when_enable_azure_monitor_logs(self): + """create_dcr=False when enable_azure_monitor_logs triggers ensure_container_insights_for_monitoring. + + enable_azure_monitor_logs is in the outer _should_create_dcra condition but NOT in + _is_cnl_or_hlsm_changing, so create_dcr must be False. + """ + dec = self._make_postprocessing_decorator({"enable_azure_monitor_logs": True}) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertFalse(kwargs["create_dcr"]) + def test_set_up_health_monitor_profile(self): # no flag - no change From c1afbcd0a42e7bc3c62dc95e68dcd3fd569a9d3a Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 18 Mar 2026 15:09:59 +0000 Subject: [PATCH 12/32] Refactor --- .../managed_cluster_decorator.py | 184 +++++++++--------- 1 file changed, 92 insertions(+), 92 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 48175c92e1b..1100d31c772 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -5332,98 +5332,6 @@ def immediate_processing_after_request(self, mc: ManagedCluster) -> None: "Could not create a role assignment for subnet. Are you an Owner on this subscription?" ) - def _should_create_dcra(self) -> bool: - """Return True if any flag that triggers a DCRA/DCR create or update was provided.""" - params = self.context.raw_param - return ( - params.get("enable_addons") is not None or - params.get("enable_azure_monitor_logs") is not None or - self._is_cnl_or_hlsm_changing() - ) - - def _is_cnl_or_hlsm_changing(self) -> bool: - """Return True if any CNL or High Log Scale Mode flag was provided.""" - params = self.context.raw_param - return ( - params.get("enable_container_network_logs") is not None or - params.get("enable_retina_flow_logs") is not None or - params.get("disable_container_network_logs") is not None or - params.get("disable_retina_flow_logs") is not None or - params.get("enable_high_log_scale_mode") is not None - ) - - def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: - """Handle monitoring addon postprocessing for the enable case.""" - enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() - if not enable_msi_auth_for_monitoring: - # add cluster spn/msi Monitoring Metrics Publisher role assignment to publish metrics to MDM - # mdm metrics is supported only in azure public cloud, so add the role assignment only in this cloud - cloud_name = self.cmd.cli_ctx.cloud.name - if cloud_name.lower() == "azurecloud": - cluster_resource_id = resource_id( - subscription=self.context.get_subscription_id(), - resource_group=self.context.get_resource_group_name(), - namespace="Microsoft.ContainerService", - type="managedClusters", - name=self.context.get_name(), - ) - self.context.external_functions.add_monitoring_role_assignment( - cluster, cluster_resource_id, self.cmd - ) - elif self._should_create_dcra(): - addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key(cluster, addon_consts) - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - cluster.addon_profiles[monitoring_addon_key], - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=False, - aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=self._is_cnl_or_hlsm_changing(), - create_dcra=True, - enable_syslog=self.context.get_enable_syslog(), - data_collection_settings=self.context.get_data_collection_settings(), - is_private_cluster=self.context.get_enable_private_cluster(), - ampls_resource_id=self.context.get_ampls_resource_id(), - enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), - ) - - def _postprocess_monitoring_disable(self) -> None: - """Handle monitoring addon postprocessing for the disable case.""" - addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - - # Get the current cluster state to check config before it was disabled - current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) - - if (current_cluster.addon_profiles and - CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles): - - addon_profile = current_cluster.addon_profiles[CONST_MONITORING_ADDON_NAME] - - try: - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - addon_profile, - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=True, - aad_route=True, - create_dcr=False, - create_dcra=True, - enable_syslog=False, - data_collection_settings=None, - ampls_resource_id=None, - enable_high_log_scale_mode=False - ) - except TypeError: - pass - # pylint: disable=too-many-locals,too-many-branches def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: """Postprocessing performed after the cluster is created. @@ -5611,6 +5519,98 @@ def put_mc(self, mc: ManagedCluster) -> ManagedCluster: ) return cluster + def _should_create_dcra(self) -> bool: + """Return True if any flag that triggers a DCRA/DCR create or update was provided.""" + params = self.context.raw_param + return ( + params.get("enable_addons") is not None or + params.get("enable_azure_monitor_logs") is not None or + self._is_cnl_or_hlsm_changing() + ) + + def _is_cnl_or_hlsm_changing(self) -> bool: + """Return True if any CNL or High Log Scale Mode flag was provided.""" + params = self.context.raw_param + return ( + params.get("enable_container_network_logs") is not None or + params.get("enable_retina_flow_logs") is not None or + params.get("disable_container_network_logs") is not None or + params.get("disable_retina_flow_logs") is not None or + params.get("enable_high_log_scale_mode") is not None + ) + + def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: + """Handle monitoring addon postprocessing for the enable case.""" + enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() + if not enable_msi_auth_for_monitoring: + # add cluster spn/msi Monitoring Metrics Publisher role assignment to publish metrics to MDM + # mdm metrics is supported only in azure public cloud, so add the role assignment only in this cloud + cloud_name = self.cmd.cli_ctx.cloud.name + if cloud_name.lower() == "azurecloud": + cluster_resource_id = resource_id( + subscription=self.context.get_subscription_id(), + resource_group=self.context.get_resource_group_name(), + namespace="Microsoft.ContainerService", + type="managedClusters", + name=self.context.get_name(), + ) + self.context.external_functions.add_monitoring_role_assignment( + cluster, cluster_resource_id, self.cmd + ) + elif self._should_create_dcra(): + addon_consts = self.context.get_addon_consts() + monitoring_addon_key = _get_monitoring_addon_key(cluster, addon_consts) + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + cluster.addon_profiles[monitoring_addon_key], + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=False, + aad_route=self.context.get_enable_msi_auth_for_monitoring(), + create_dcr=self._is_cnl_or_hlsm_changing(), + create_dcra=True, + enable_syslog=self.context.get_enable_syslog(), + data_collection_settings=self.context.get_data_collection_settings(), + is_private_cluster=self.context.get_enable_private_cluster(), + ampls_resource_id=self.context.get_ampls_resource_id(), + enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), + ) + + def _postprocess_monitoring_disable(self) -> None: + """Handle monitoring addon postprocessing for the disable case.""" + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + + # Get the current cluster state to check config before it was disabled + current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) + + if (current_cluster.addon_profiles and + CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles): + + addon_profile = current_cluster.addon_profiles[CONST_MONITORING_ADDON_NAME] + + try: + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + addon_profile, + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=True, + aad_route=True, + create_dcr=False, + create_dcra=True, + enable_syslog=False, + data_collection_settings=None, + ampls_resource_id=None, + enable_high_log_scale_mode=False + ) + except TypeError: + pass + class AKSPreviewManagedClusterUpdateDecorator(AKSManagedClusterUpdateDecorator): def __init__( From 9c429a05898944574889cfb31f86bc25414553ad Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 18 Mar 2026 19:09:30 +0000 Subject: [PATCH 13/32] Add unit and integration tests --- .../managed_cluster_decorator.py | 49 +- .../tests/latest/test_aks_commands.py | 439 ++++++++++++++++++ .../latest/test_managed_cluster_decorator.py | 31 +- 3 files changed, 497 insertions(+), 22 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 1100d31c772..cd835a56ca2 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -1072,8 +1072,12 @@ def get_container_network_logs(self, mc: ManagedCluster) -> Union[bool, None]: ) monitoring_already_enabled = ( mc.addon_profiles and - mc.addon_profiles.get("omsagent") and - mc.addon_profiles["omsagent"].enabled + ( + (mc.addon_profiles.get("omsagent") and + mc.addon_profiles["omsagent"].enabled) or + (mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) and + mc.addon_profiles[CONST_MONITORING_ADDON_NAME_CAMELCASE].enabled) + ) ) monitoring_enabled = monitoring_being_enabled or monitoring_already_enabled if not acns_enabled or not monitoring_enabled: @@ -4695,16 +4699,27 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: } mc.addon_profiles[CONST_MONITORING_ADDON_NAME] = addon_profile - self.context.set_intermediate("monitoring_addon_enabled", True, overwrite_exists=True) - - # Call ensure_container_insights_for_monitoring with all parameters (similar to postprocessing) - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - if (mc.addon_profiles and - CONST_MONITORING_ADDON_NAME in mc.addon_profiles and - mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled): + # Create DCR before the cluster is created (matching base class build_monitoring_addon_profile pattern). + # The DCRA will be created later in postprocessing_after_mc_created. + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + addon_profile, + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=False, + aad_route=self.context.get_enable_msi_auth_for_monitoring(), + create_dcr=True, + create_dcra=False, + enable_syslog=self.context.get_enable_syslog(), + data_collection_settings=self.context.get_data_collection_settings(), + is_private_cluster=self.context.get_enable_private_cluster(), + ampls_resource_id=self.context.get_ampls_resource_id(), + enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), + ) - # Set intermediate value to trigger postprocessing - self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) + self.context.set_intermediate("monitoring_addon_enabled", True, overwrite_exists=True) def _setup_opentelemetry_metrics(self, mc: ManagedCluster) -> None: """Set up OpenTelemetry metrics configuration.""" @@ -7882,13 +7897,6 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: mc.addon_profiles[existing_key] = addon_profile self.context.set_intermediate("monitoring_addon_enabled", True, overwrite_exists=True) - # Call ensure_container_insights_for_monitoring with all parameters (similar to postprocessing) - if (mc.addon_profiles and - existing_key in mc.addon_profiles and - mc.addon_profiles[existing_key].enabled): - - # Set intermediate value to trigger postprocessing - self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) def _disable_azure_monitor_logs(self, mc: ManagedCluster) -> None: """Disable Azure Monitor logs configuration.""" @@ -7972,8 +7980,9 @@ def _disable_azure_monitor_logs(self, mc: ManagedCluster) -> None: # Now disable the addon and clear configuration mc.addon_profiles[addon_key].enabled = False - # Clear the config to remove old workspace resource ID and other settings - mc.addon_profiles[addon_key].config = None + # Use empty dict instead of None - setting config=None causes the SDK serializer + # to omit the config key entirely, which Azure interprets as "keep existing config" + mc.addon_profiles[addon_key].config = {} # Also disable OpenTelemetry logs when disabling Azure Monitor logs if opentelemetry_logs_enabled: diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index cfa13cb033e..70ad3b4dd5b 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19322,6 +19322,445 @@ def test_aks_create_acns_with_flow_logs( checks=[self.is_empty()], ) + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="eastus2euap", + ) + def test_aks_create_with_azuremonitorlogs_and_cnl( + self, resource_group, resource_group_location + ): + """Test that --enable-azure-monitor-logs with --enable-container-network-logs creates DCR/DCRA correctly. + + This covers the scenario where monitoring is enabled via --enable-azure-monitor-logs (not --enable-addons monitoring) + combined with --enable-container-network-logs. The DCRA postprocessing must detect enable_azure_monitor_logs + to trigger ensure_container_insights_for_monitoring and create the DCR with ContainerNetworkLogs stream. + """ + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "ssh_key_value": self.generate_ssh_keys(), + "location": resource_group_location, + } + ) + + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-count=1 --tier standard " + "--network-plugin azure --network-dataplane=cilium --network-plugin-mode overlay " + "--enable-acns --enable-container-network-logs " + "--enable-azure-monitor-logs --enable-high-log-scale-mode " + "--aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/AdvancedNetworkingFlowLogsPreview " + ) + + response = self.cmd( + create_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.useAADAuth", "true"), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "True"), + ], + ).get_output_in_json() + + cluster_resource_id = response["id"] + subscription = cluster_resource_id.split("/")[2] + + # Verify DCR was created with ContainerNetworkLogs stream + location = resource_group_location + dataCollectionRuleName = f"MSCI-{location}-{aks_name}" + dataCollectionRuleName = dataCollectionRuleName[0:64] + dcr_resource_id = f"/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}" + + get_cmd = f'rest --method get --url https://management.azure.com{dcr_resource_id}?api-version=2022-06-01' + self.cmd(get_cmd, checks=[ + self.check('properties.dataFlows[0].streams[-1]', 'Microsoft-ContainerNetworkLogs'), + ]) + + # Verify DCRA was created + dcra_resource_id = f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension" + get_cmd = f'rest --method get --url https://management.azure.com{dcra_resource_id}?api-version=2022-06-01' + self.cmd(get_cmd, checks=[ + self.check('properties.dataCollectionRuleId', f'{dcr_resource_id}') + ]) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="westus2", + ) + def test_aks_update_enable_azuremonitorlogs_with_hlsm( + self, resource_group, resource_group_location + ): + """Test that --enable-azure-monitor-logs with --enable-high-log-scale-mode on update creates DCR with HighScale stream. + + Creates a plain cluster, then updates it with --enable-azure-monitor-logs --enable-high-log-scale-mode, + and verifies that the DCR is created with the Microsoft-ContainerLogV2-HighScale stream. + """ + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + node_vm_size = "standard_d2s_v3" + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "location": resource_group_location, + "ssh_key_value": self.generate_ssh_keys(), + "node_vm_size": node_vm_size, + } + ) + + # Create a plain cluster without monitoring + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} " + "--enable-managed-identity --output=json" + ) + self.cmd(create_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + ]) + + # Update: enable monitoring with high log scale mode + update_cmd = ( + "aks update --resource-group={resource_group} --name={name} --yes " + "--enable-azure-monitor-logs --enable-high-log-scale-mode --output=json" + ) + self.cmd(update_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.useAADAuth", "true"), + ]) + + # Verify aks show reflects the update + show_cmd = "aks show --resource-group={resource_group} --name={name} --output=json" + response = self.cmd(show_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + ]).get_output_in_json() + + cluster_resource_id = response["id"] + subscription = cluster_resource_id.split("/")[2] + + # Verify DCR was created with HighScale stream + location = resource_group_location + dataCollectionRuleName = f"MSCI-{location}-{aks_name}" + dataCollectionRuleName = dataCollectionRuleName[0:64] + dcr_resource_id = f"/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}" + + get_cmd = f'rest --method get --url https://management.azure.com{dcr_resource_id}?api-version=2022-06-01' + self.cmd(get_cmd, checks=[ + self.check('properties.dataFlows[0].streams[1]', 'Microsoft-ContainerLogV2-HighScale'), + ]) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="eastus2euap", + ) + def test_aks_create_with_retina_flow_logs_alias( + self, resource_group, resource_group_location + ): + """Test that --enable-retina-flow-logs works as an alias for --enable-container-network-logs. + + This verifies the alias parameter path: enable_retina_flow_logs is treated identically to + enable_container_network_logs in get_container_network_logs() and _is_cnl_or_hlsm_changing(). + """ + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "ssh_key_value": self.generate_ssh_keys(), + "location": resource_group_location, + } + ) + + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-count=1 --tier standard " + "--network-plugin azure --network-dataplane=cilium --network-plugin-mode overlay " + "--enable-acns --enable-retina-flow-logs " + "--enable-addons monitoring --enable-high-log-scale-mode " + "--aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/AdvancedNetworkingFlowLogsPreview " + ) + + response = self.cmd( + create_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "True"), + ], + ).get_output_in_json() + + cluster_resource_id = response["id"] + subscription = cluster_resource_id.split("/")[2] + + # Verify DCR was created with ContainerNetworkLogs stream + location = resource_group_location + dataCollectionRuleName = f"MSCI-{location}-{aks_name}" + dataCollectionRuleName = dataCollectionRuleName[0:64] + dcr_resource_id = f"/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}" + + get_cmd = f'rest --method get --url https://management.azure.com{dcr_resource_id}?api-version=2022-06-01' + self.cmd(get_cmd, checks=[ + self.check('properties.dataFlows[0].streams[-1]', 'Microsoft-ContainerNetworkLogs'), + ]) + + # Disable via the alias — run twice like test_aks_create_acns_with_flow_logs + # (first run applies the change, second run verifies the result) + disable_cmd = "aks update --resource-group={resource_group} --name={name} --disable-retina-flow-logs -o json" + self.cmd(disable_cmd, checks=[self.check("provisioningState", "Succeeded")]) + self.cmd( + disable_cmd, + checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "False"), + ], + ) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="eastus2euap", + ) + def test_aks_update_enable_cnl_via_azuremonitorlogs( + self, resource_group, resource_group_location + ): + """Test enabling CNL on an existing ACNS cluster by adding --enable-azure-monitor-logs on update. + + Creates an ACNS cluster without monitoring, then updates with --enable-azure-monitor-logs + --enable-container-network-logs to cover the update decorator path for enabling both + monitoring and CNL simultaneously. + """ + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "ssh_key_value": self.generate_ssh_keys(), + "location": resource_group_location, + } + ) + + # Create an ACNS cluster without monitoring + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-count=1 --tier standard " + "--network-plugin azure --network-dataplane=cilium --network-plugin-mode overlay " + "--enable-acns --enable-managed-identity --output=json " + ) + self.cmd(create_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("networkProfile.advancedNetworking.enabled", True), + ]) + + # Update: enable monitoring + CNL together + update_cmd = ( + "aks update --resource-group={resource_group} --name={name} --yes " + "--enable-azure-monitor-logs --enable-container-network-logs --enable-high-log-scale-mode " + "--aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/AdvancedNetworkingFlowLogsPreview " + "--output=json" + ) + self.cmd(update_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "True"), + ]) + + # Verify DCR was created with ContainerNetworkLogs stream + show_cmd = "aks show --resource-group={resource_group} --name={name} --output=json" + response = self.cmd(show_cmd).get_output_in_json() + + cluster_resource_id = response["id"] + subscription = cluster_resource_id.split("/")[2] + + location = resource_group_location + dataCollectionRuleName = f"MSCI-{location}-{aks_name}" + dataCollectionRuleName = dataCollectionRuleName[0:64] + dcr_resource_id = f"/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}" + + get_cmd = f'rest --method get --url https://management.azure.com{dcr_resource_id}?api-version=2022-06-01' + self.cmd(get_cmd, checks=[ + self.check('properties.dataFlows[0].streams[-1]', 'Microsoft-ContainerNetworkLogs'), + ]) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="westus2", + ) + def test_aks_update_disable_azuremonitorlogs( + self, resource_group, resource_group_location + ): + """Test disabling Azure Monitor Logs via --disable-azure-monitor-logs on update. + + Creates a cluster with --enable-azure-monitor-logs, then disables monitoring + with --disable-azure-monitor-logs. Verifies the _disable_azure_monitor_logs code path + which performs DCR/DCRA cleanup before disabling the addon. + """ + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "ssh_key_value": self.generate_ssh_keys(), + "location": resource_group_location, + } + ) + + # Create cluster with Azure Monitor Logs enabled + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-count=1 " + "--enable-azure-monitor-logs --enable-managed-identity --output=json" + ) + self.cmd(create_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + ]) + + # Disable Azure Monitor Logs + disable_cmd = ( + "aks update --resource-group={resource_group} --name={name} --yes " + "--disable-azure-monitor-logs --output=json" + ) + self.cmd(disable_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", False), + ]) + + # Verify aks show confirms monitoring is disabled + show_cmd = "aks show --resource-group={resource_group} --name={name} --output=json" + self.cmd(show_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", False), + ]) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="westus2", + ) + def test_aks_update_standalone_enable_high_log_scale_mode( + self, resource_group, resource_group_location + ): + """Test standalone --enable-high-log-scale-mode on update path. + + Creates a cluster with monitoring enabled, then updates with just + --enable-high-log-scale-mode to verify the DCR is updated with + the Microsoft-ContainerLogV2-HighScale stream. + """ + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "ssh_key_value": self.generate_ssh_keys(), + "location": resource_group_location, + } + ) + + # Create cluster with Azure Monitor Logs enabled + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-count=1 " + "--enable-azure-monitor-logs --enable-managed-identity --output=json" + ) + self.cmd(create_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + self.check("addonProfiles.omsagent.enabled", True), + ]) + + # Update: enable high log scale mode standalone + update_cmd = ( + "aks update --resource-group={resource_group} --name={name} --yes " + "--enable-high-log-scale-mode --output=json" + ) + response = self.cmd(update_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + ]).get_output_in_json() + + cluster_resource_id = response["id"] + subscription = cluster_resource_id.split("/")[2] + location = resource_group_location + dataCollectionRuleName = f"MSCI-{location}-{aks_name}" + dataCollectionRuleName = dataCollectionRuleName[0:64] + dcr_resource_id = ( + f"/subscriptions/{subscription}/resourceGroups/{resource_group}" + f"/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}" + ) + + # Verify DCR contains the HighScale stream + get_cmd = f'rest --method get --url https://management.azure.com{dcr_resource_id}?api-version=2022-06-01' + self.cmd(get_cmd, checks=[ + self.check('properties.dataFlows[0].streams[-1]', 'Microsoft-ContainerLogV2-HighScale'), + ]) + + # Now disable high log scale mode + disable_cmd = ( + "aks update --resource-group={resource_group} --name={name} --yes " + "--enable-high-log-scale-mode false --output=json" + ) + self.cmd(disable_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + ]) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + @AllowLargeResponse() @AKSCustomResourceGroupPreparer( random_name_length=17, diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index 887eb77484d..bcaccdba108 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -5160,6 +5160,33 @@ def test_get_container_network_logs_with_monitoring_already_on_mc(self): result = ctx.get_container_network_logs(mc) self.assertTrue(result) + def test_get_container_network_logs_with_monitoring_camelcase_key_on_mc(self): + """Test get_container_network_logs succeeds when monitoring uses omsAgent (camelCase) key on mc.""" + ctx = AKSPreviewManagedClusterContext( + self.cmd, + AKSManagedClusterParamDict({ + "enable_container_network_logs": True, + }), + self.models, + decorator_mode=DecoratorMode.UPDATE, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking( + enabled=True, + ), + ), + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + ) + }, + ) + ctx.attach_mc(mc) + result = ctx.get_container_network_logs(mc) + self.assertTrue(result) + def test_get_container_network_logs_error_without_acns(self): """Test get_container_network_logs raises error when ACNS is not enabled.""" ctx = AKSPreviewManagedClusterContext( @@ -11595,7 +11622,7 @@ def test_disable_azure_monitor_logs_with_omsagent_camelcase(self): # Verify: omsAgent should be disabled self.assertIn("omsAgent", mc_1.addon_profiles) self.assertFalse(mc_1.addon_profiles["omsAgent"].enabled) - self.assertIsNone(mc_1.addon_profiles["omsAgent"].config) + self.assertEqual(mc_1.addon_profiles["omsAgent"].config, {}) def test_disable_azure_monitor_logs_with_omsagent_lowercase(self): # Test that _disable_azure_monitor_logs handles omsagent (lowercase) correctly @@ -11631,7 +11658,7 @@ def test_disable_azure_monitor_logs_with_omsagent_lowercase(self): # Verify: omsagent should be disabled self.assertIn("omsagent", mc_1.addon_profiles) self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) - self.assertIsNone(mc_1.addon_profiles["omsagent"].config) + self.assertEqual(mc_1.addon_profiles["omsagent"].config, {}) def test_get_enable_opentelemetry_logs_validation_with_omsagent_camelcase(self): # Test that OpenTelemetry logs validation recognizes omsAgent (camelCase) as enabled From 66b9b20e7c5f344b1721f108389df5a90e86ab19 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 19 Mar 2026 12:16:19 +0000 Subject: [PATCH 14/32] Fix integration tests --- .../managed_cluster_decorator.py | 80 ++++++++++++------- .../tests/latest/test_aks_commands.py | 2 +- .../latest/test_managed_cluster_decorator.py | 4 +- 3 files changed, 54 insertions(+), 32 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index cd835a56ca2..609ded23580 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -5882,6 +5882,11 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus """ self._ensure_mc(mc) + # Call the base class implementation if it exists (CLI >= 2.84.0 added this method to the base). + # This ensures any base-class monitoring handling (e.g., enable/disable) is applied first. + if hasattr(super(), 'update_monitoring_profile_flow_logs'): + mc = super().update_monitoring_profile_flow_logs(mc) + # Trigger validation for high log scale mode when container network logs are enabled. # This ensures proper error messages are raised before cluster update if the user # explicitly disables high log scale mode while enabling container network logs. @@ -5918,35 +5923,45 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus # monitoring with MSI auth is already enabled, then trigger the DCR update via postprocessing. enable_high_log_scale_mode = self.context.raw_param.get("enable_high_log_scale_mode") if enable_high_log_scale_mode is True: - addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") + # Check if monitoring is being enabled in the same command + enable_azure_monitor_logs = self.context.raw_param.get("enable_azure_monitor_logs") + enable_addons = self.context.raw_param.get("enable_addons") + monitoring_being_enabled = ( + enable_azure_monitor_logs or + (enable_addons and "monitoring" in enable_addons) + ) - # Resolve the addon profile, handling both "omsagent" and "omsAgent" key variants. - monitoring_addon_profile = None - if mc.addon_profiles: - monitoring_addon_profile = ( - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - ) + if not monitoring_being_enabled: + # Only validate existing addon state when not enabling monitoring simultaneously + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") + + # Resolve the addon profile, handling both "omsagent" and "omsAgent" key variants. + monitoring_addon_profile = None + if mc.addon_profiles: + monitoring_addon_profile = ( + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or + mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) + ) - if not monitoring_addon_profile or not monitoring_addon_profile.enabled: - raise RequiredArgumentMissingError( - "--enable-high-log-scale-mode requires the Azure Monitor logs addon (omsagent) " - "to be enabled on the cluster. Please enable it first with " - "--enable-addons monitoring or --enable-azure-monitor-logs." - ) + if not monitoring_addon_profile or not monitoring_addon_profile.enabled: + raise RequiredArgumentMissingError( + "--enable-high-log-scale-mode requires the Azure Monitor logs addon (omsagent) " + "to be enabled on the cluster. Please enable it first with " + "--enable-addons monitoring or --enable-azure-monitor-logs." + ) - addon_config = monitoring_addon_profile.config or {} - msi_auth_enabled = ( - CONST_MONITORING_USING_AAD_MSI_AUTH in addon_config and - str(addon_config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == "true" - ) - if not msi_auth_enabled: - raise RequiredArgumentMissingError( - "--enable-high-log-scale-mode requires MSI authentication to be enabled " - "for the monitoring addon. Please enable it with --enable-msi-auth-for-monitoring." + addon_config = monitoring_addon_profile.config or {} + msi_auth_enabled = ( + CONST_MONITORING_USING_AAD_MSI_AUTH in addon_config and + str(addon_config[CONST_MONITORING_USING_AAD_MSI_AUTH]).lower() == "true" ) + if not msi_auth_enabled: + raise RequiredArgumentMissingError( + "--enable-high-log-scale-mode requires MSI authentication to be enabled " + "for the monitoring addon. Please enable it with --enable-msi-auth-for-monitoring." + ) self.context.set_intermediate("monitoring_addon_postprocessing_required", True, overwrite_exists=True) @@ -7979,10 +7994,14 @@ def _disable_azure_monitor_logs(self, mc: ManagedCluster) -> None: # Now disable the addon and clear configuration mc.addon_profiles[addon_key].enabled = False + mc.addon_profiles[addon_key].config = None - # Use empty dict instead of None - setting config=None causes the SDK serializer - # to omit the config key entirely, which Azure interprets as "keep existing config" - mc.addon_profiles[addon_key].config = {} + # Also disable azureMonitorProfile.containerInsights (the new API surface) + # The RP uses containerInsights.enabled as the source of truth; if it remains + # true while the legacy addon is disabled, the RP re-enables the addon. + if (mc.azure_monitor_profile and + mc.azure_monitor_profile.container_insights): + mc.azure_monitor_profile.container_insights.enabled = False # Also disable OpenTelemetry logs when disabling Azure Monitor logs if opentelemetry_logs_enabled: @@ -8121,7 +8140,10 @@ def update_mc_profile_preview(self) -> ManagedCluster: # update acns in network_profile mc = self.update_acns_in_network_profile(mc) # update update_monitoring_profile_flow_logs - mc = self.update_monitoring_profile_flow_logs(mc) + # Only call here if the base class doesn't already call it in update_mc_profile_default + # (CLI >= 2.84.0 added this call to the base class) + if not hasattr(super(), 'update_monitoring_profile_flow_logs'): + mc = self.update_monitoring_profile_flow_logs(mc) # update kubernetes support plan mc = self.update_k8s_support_plan(mc) # update AI toolchain operator diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index 70ad3b4dd5b..1a20a38d3aa 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19743,7 +19743,7 @@ def test_aks_update_standalone_enable_high_log_scale_mode( # Verify DCR contains the HighScale stream get_cmd = f'rest --method get --url https://management.azure.com{dcr_resource_id}?api-version=2022-06-01' self.cmd(get_cmd, checks=[ - self.check('properties.dataFlows[0].streams[-1]', 'Microsoft-ContainerLogV2-HighScale'), + self.check("contains(properties.dataFlows[0].streams, 'Microsoft-ContainerLogV2-HighScale')", True), ]) # Now disable high log scale mode diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index bcaccdba108..e14a0fe21d9 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -11622,7 +11622,7 @@ def test_disable_azure_monitor_logs_with_omsagent_camelcase(self): # Verify: omsAgent should be disabled self.assertIn("omsAgent", mc_1.addon_profiles) self.assertFalse(mc_1.addon_profiles["omsAgent"].enabled) - self.assertEqual(mc_1.addon_profiles["omsAgent"].config, {}) + self.assertIsNone(mc_1.addon_profiles["omsAgent"].config) def test_disable_azure_monitor_logs_with_omsagent_lowercase(self): # Test that _disable_azure_monitor_logs handles omsagent (lowercase) correctly @@ -11658,7 +11658,7 @@ def test_disable_azure_monitor_logs_with_omsagent_lowercase(self): # Verify: omsagent should be disabled self.assertIn("omsagent", mc_1.addon_profiles) self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) - self.assertEqual(mc_1.addon_profiles["omsagent"].config, {}) + self.assertIsNone(mc_1.addon_profiles["omsagent"].config) def test_get_enable_opentelemetry_logs_validation_with_omsagent_camelcase(self): # Test that OpenTelemetry logs validation recognizes omsAgent (camelCase) as enabled From ea1937d5837b28b9addb55f06ef3acdf4d74a3f5 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 19 Mar 2026 13:49:01 +0000 Subject: [PATCH 15/32] Fix tests + lint --- src/aks-preview/azext_aks_preview/custom.py | 8 ++- .../managed_cluster_decorator.py | 49 +++++++++++++++---- src/aks-preview/linter_exclusions.yml | 36 ++++++++++++++ 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index cf7c7197235..3a8e3eb2c1c 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -552,7 +552,7 @@ def __init__(self, location, resource_id): ) resources = get_resources_client(cmd.cli_ctx, cluster_subscription) - for _ in range(3): + for attempt in range(3): try: if enable_syslog: resources.begin_create_or_update_by_id( @@ -568,8 +568,12 @@ def __init__(self, location, resource_id): ) error = None break - except CLIError as e: + except (CLIError, HttpResponseError) as e: error = e + # Wait before retry to allow workspace tables to become available + if attempt < 2: + import time + time.sleep(30) else: raise error diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 609ded23580..88e1c0a40c3 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -7,6 +7,7 @@ import copy import datetime import os +import time from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union @@ -56,6 +57,7 @@ CONST_TRANSIT_ENCRYPTION_TYPE_MTLS, CONST_ADVANCED_NETWORKPOLICIES_L7, ) +from azure.core.exceptions import HttpResponseError, ResourceExistsError from azext_aks_preview.azurecontainerstorage._consts import ( CONST_ACSTOR_EXT_INSTALLATION_NAME, CONST_ACSTOR_V1_EXT_INSTALLATION_NAME, @@ -4670,11 +4672,21 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if not workspace_resource_id: ensure_workspace_func = ( self.context.external_functions.ensure_default_log_analytics_workspace_for_monitoring) - workspace_resource_id = ensure_workspace_func( - self.cmd, - self.context.get_subscription_id(), - self.context.get_resource_group_name() - ) + # Retry with backoff to handle 409 Conflict when workspace is still provisioning + # (common when parallel tests or commands target the same default workspace) + for attempt in range(3): + try: + workspace_resource_id = ensure_workspace_func( + self.cmd, + self.context.get_subscription_id(), + self.context.get_resource_group_name() + ) + break + except (HttpResponseError, ResourceExistsError): + if attempt < 2: + time.sleep(30) + else: + raise # Sanitize and configure sanitize_func = self.context.external_functions.sanitize_loganalytics_ws_resource_id @@ -7868,11 +7880,21 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if not workspace_resource_id: ensure_workspace_func = ( self.context.external_functions.ensure_default_log_analytics_workspace_for_monitoring) - workspace_resource_id = ensure_workspace_func( - self.cmd, - self.context.get_subscription_id(), - self.context.get_resource_group_name() - ) + # Retry with backoff to handle 409 Conflict when workspace is still provisioning + # (common when parallel tests or commands target the same default workspace) + for attempt in range(3): + try: + workspace_resource_id = ensure_workspace_func( + self.cmd, + self.context.get_subscription_id(), + self.context.get_resource_group_name() + ) + break + except (HttpResponseError, ResourceExistsError): + if attempt < 2: + time.sleep(30) + else: + raise # Sanitize and configure sanitize_func = self.context.external_functions.sanitize_loganalytics_ws_resource_id @@ -7907,6 +7929,13 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: CONST_MONITORING_USING_AAD_MSI_AUTH: enable_msi_auth } + # Also set enableRetinaNetworkFlags if container network logs are being enabled + # in the same command. This must be done here because update_monitoring_profile_flow_logs + # may run before update_addon_profiles when the base class calls it first. + container_network_logs_enabled = self.context.get_container_network_logs(mc) + if container_network_logs_enabled is not None: + new_config["enableRetinaNetworkFlags"] = str(container_network_logs_enabled) + # Replace the entire config, not just individual keys addon_profile.config = new_config diff --git a/src/aks-preview/linter_exclusions.yml b/src/aks-preview/linter_exclusions.yml index 17c229ab774..aebdb64dd8d 100644 --- a/src/aks-preview/linter_exclusions.yml +++ b/src/aks-preview/linter_exclusions.yml @@ -1,5 +1,41 @@ +aks addon: + rule_exclusions: + - require_wait_command_if_no_wait +aks extension: + rule_exclusions: + - require_wait_command_if_no_wait +aks jwtauthenticator: + rule_exclusions: + - require_wait_command_if_no_wait +aks machine: + rule_exclusions: + - require_wait_command_if_no_wait +aks mesh upgrade: + rule_exclusions: + - require_wait_command_if_no_wait +aks mesh: + rule_exclusions: + - require_wait_command_if_no_wait +aks namespace: + rule_exclusions: + - require_wait_command_if_no_wait +aks nodepool manual-scale: + rule_exclusions: + - require_wait_command_if_no_wait +aks nodepool snapshot: + rule_exclusions: + - require_wait_command_if_no_wait +aks nodepool: + rule_exclusions: + - require_wait_command_if_no_wait +aks snapshot: + rule_exclusions: + - require_wait_command_if_no_wait aks create: parameters: + node_resource_group: + rule_exclusions: + - parameter_should_not_end_in_resource_group enable_sgxquotehelper: rule_exclusions: - option_length_too_long From ce5edfc8b6cee3d8d13de08c326fd0cc5871e5dc Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Thu, 19 Mar 2026 14:19:05 +0000 Subject: [PATCH 16/32] Fix linting --- src/aks-preview/azext_aks_preview/custom.py | 1 - src/aks-preview/azext_aks_preview/managed_cluster_decorator.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 3a8e3eb2c1c..b2aef38e18e 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -572,7 +572,6 @@ def __init__(self, location, resource_id): error = e # Wait before retry to allow workspace tables to become available if attempt < 2: - import time time.sleep(30) else: raise error diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 88e1c0a40c3..966e157676a 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -57,7 +57,6 @@ CONST_TRANSIT_ENCRYPTION_TYPE_MTLS, CONST_ADVANCED_NETWORKPOLICIES_L7, ) -from azure.core.exceptions import HttpResponseError, ResourceExistsError from azext_aks_preview.azurecontainerstorage._consts import ( CONST_ACSTOR_EXT_INSTALLATION_NAME, CONST_ACSTOR_V1_EXT_INSTALLATION_NAME, @@ -102,6 +101,7 @@ from azext_aks_preview.custom import ( ensure_container_insights_for_monitoring_preview, ) +from azure.core.exceptions import HttpResponseError, ResourceExistsError from azure.cli.command_modules.acs._client_factory import get_graph_client from azure.cli.command_modules.acs._consts import ( CONST_OUTBOUND_TYPE_LOAD_BALANCER, From 767e9bb33a41f6524ce036ba44454ad4d82abb26 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Fri, 20 Mar 2026 09:37:46 +0000 Subject: [PATCH 17/32] Fix history --- src/aks-preview/HISTORY.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/aks-preview/HISTORY.rst b/src/aks-preview/HISTORY.rst index 7fda2a870ce..062f9107c50 100644 --- a/src/aks-preview/HISTORY.rst +++ b/src/aks-preview/HISTORY.rst @@ -45,6 +45,11 @@ Pending * `az aks namespace update`: Fix location should use existing namespace location. * `az aks nodepool update`: Add `--disable-artifact-streaming` to disable artifact streaming. +19.0.0b28 ++++++++ +* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. +* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled. + 19.0.0b27 +++++++ * `az aks nodepool add`: Fix `InvalidParameter` error when `mode` is `Machines`. @@ -55,11 +60,6 @@ Pending * `az aks approuting gateway istio enable/disable`: Add new subcommands to enable or disable the Istio Gateway API implementation for App Routing on an existing cluster. * Add 'mTLS' as a transit encryption type option for `--acns-transit-encryption-type` in `az aks create/update` -19.0.0b26 -+++++++ -* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. -* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled. - 19.0.0b25 +++++++ * `az aks create`: Add `--enable-continuous-control-plane-and-addon-monitor` to enable continuous control plane and addon monitor. From cdf322bd7d16f3a05a6b9cb59b6791a8e9351c38 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 23 Mar 2026 14:58:54 +0000 Subject: [PATCH 18/32] PR comments + tests --- .../managed_cluster_decorator.py | 68 +-- .../tests/latest/test_aks_commands.py | 47 ++ .../latest/test_managed_cluster_decorator.py | 578 +++++++++++++++++- 3 files changed, 642 insertions(+), 51 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 966e157676a..063a0ec2fcc 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -7,6 +7,7 @@ import copy import datetime import os +import random import time from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union @@ -4672,9 +4673,12 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if not workspace_resource_id: ensure_workspace_func = ( self.context.external_functions.ensure_default_log_analytics_workspace_for_monitoring) - # Retry with backoff to handle 409 Conflict when workspace is still provisioning - # (common when parallel tests or commands target the same default workspace) - for attempt in range(3): + # Retry with exponential backoff + jitter to handle 409 Conflict when + # workspace is still provisioning (common when parallel commands target + # the same default workspace). Delays: ~5 s, ~10 s, ~20 s (worst-case + # total ~52 s; fast path ~7 s if the first retry succeeds). + max_attempts = 4 + for attempt in range(max_attempts): try: workspace_resource_id = ensure_workspace_func( self.cmd, @@ -4683,8 +4687,10 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: ) break except (HttpResponseError, ResourceExistsError): - if attempt < 2: - time.sleep(30) + if attempt < max_attempts - 1: + base_delay = 5 * (2 ** attempt) # 5 s, 10 s, 20 s + jitter = random.uniform(0, base_delay * 0.5) + time.sleep(base_delay + jitter) else: raise @@ -4711,26 +4717,9 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: } mc.addon_profiles[CONST_MONITORING_ADDON_NAME] = addon_profile - # Create DCR before the cluster is created (matching base class build_monitoring_addon_profile pattern). - # The DCRA will be created later in postprocessing_after_mc_created. - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - addon_profile, - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=False, - aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=True, - create_dcra=False, - enable_syslog=self.context.get_enable_syslog(), - data_collection_settings=self.context.get_data_collection_settings(), - is_private_cluster=self.context.get_enable_private_cluster(), - ampls_resource_id=self.context.get_ampls_resource_id(), - enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), - ) - + # DCR and DCRA creation is deferred to postprocessing_after_mc_created + # (_postprocess_monitoring_enable) so that all flags are finalized and + # the cluster exists. Only MSI clusters need a DCR. self.context.set_intermediate("monitoring_addon_enabled", True, overwrite_exists=True) def _setup_opentelemetry_metrics(self, mc: ManagedCluster) -> None: @@ -5596,7 +5585,7 @@ def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: self.context.get_location(), remove_monitoring=False, aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=self._is_cnl_or_hlsm_changing(), + create_dcr=True, create_dcra=True, enable_syslog=self.context.get_enable_syslog(), data_collection_settings=self.context.get_data_collection_settings(), @@ -5910,17 +5899,9 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus if container_network_logs_enabled is not None: if mc.addon_profiles: addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - # Handle both "omsagent" and "omsAgent" key variants - monitoring_addon_profile = ( - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - ) + monitoring_addon_key = _get_monitoring_addon_key(mc, addon_consts) + monitoring_addon_profile = mc.addon_profiles.get(monitoring_addon_key) if monitoring_addon_profile: - monitoring_addon_key = ( - CONST_MONITORING_ADDON_NAME if CONST_MONITORING_ADDON_NAME in mc.addon_profiles - else CONST_MONITORING_ADDON_NAME_CAMELCASE - ) config = monitoring_addon_profile.config or {} config["enableRetinaNetworkFlags"] = str(container_network_logs_enabled) mc.addon_profiles[monitoring_addon_key].config = config @@ -7880,9 +7861,12 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if not workspace_resource_id: ensure_workspace_func = ( self.context.external_functions.ensure_default_log_analytics_workspace_for_monitoring) - # Retry with backoff to handle 409 Conflict when workspace is still provisioning - # (common when parallel tests or commands target the same default workspace) - for attempt in range(3): + # Retry with exponential backoff + jitter to handle 409 Conflict when + # workspace is still provisioning (common when parallel commands target + # the same default workspace). Delays: ~5 s, ~10 s, ~20 s (worst-case + # total ~52 s; fast path ~7 s if the first retry succeeds). + max_attempts = 4 + for attempt in range(max_attempts): try: workspace_resource_id = ensure_workspace_func( self.cmd, @@ -7891,8 +7875,10 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: ) break except (HttpResponseError, ResourceExistsError): - if attempt < 2: - time.sleep(30) + if attempt < max_attempts - 1: + base_delay = 5 * (2 ** attempt) # 5 s, 10 s, 20 s + jitter = random.uniform(0, base_delay * 0.5) + time.sleep(base_delay + jitter) else: raise diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index 1a20a38d3aa..32dd6f6a54f 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19761,6 +19761,53 @@ def test_aks_update_standalone_enable_high_log_scale_mode( checks=[self.is_empty()], ) + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer( + random_name_length=17, + name_prefix="clitest", + location="westus2", + ) + def test_aks_update_disable_hlsm_error_when_cnl_enabled( + self, resource_group, resource_group_location + ): + """Test that disabling --enable-high-log-scale-mode raises an error + when container network logs are already enabled on the cluster.""" + self.test_resources_count = 0 + aks_name = self.create_random_name("cliakstest", 16) + self.kwargs.update( + { + "resource_group": resource_group, + "name": aks_name, + "ssh_key_value": self.generate_ssh_keys(), + "location": resource_group_location, + } + ) + + # Create cluster with monitoring + CNL + HLSM + create_cmd = ( + "aks create --resource-group={resource_group} --name={name} --location={location} " + "--ssh-key-value={ssh_key_value} --node-count=1 " + "--enable-azure-monitor-logs --enable-managed-identity " + "--enable-acns --enable-container-network-logs --output=json" + ) + self.cmd(create_cmd, checks=[ + self.check("provisioningState", "Succeeded"), + ]) + + # Attempt to disable HLSM while CNL is still enabled — should fail + disable_cmd = ( + "aks update --resource-group={resource_group} --name={name} --yes " + "--enable-high-log-scale-mode false --output=json" + ) + with self.assertRaisesRegex(Exception, "container network logs"): + self.cmd(disable_cmd) + + # delete + self.cmd( + "aks delete -g {resource_group} -n {name} --yes --no-wait", + checks=[self.is_empty()], + ) + @AllowLargeResponse() @AKSCustomResourceGroupPreparer( random_name_length=17, diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index e14a0fe21d9..c5c8c5d8ecb 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -73,6 +73,7 @@ AKSPreviewManagedClusterCreateDecorator, AKSPreviewManagedClusterModels, AKSPreviewManagedClusterUpdateDecorator, + _get_monitoring_addon_key, ) from azext_aks_preview.tests.latest.utils import get_test_data_file_path from azure.cli.command_modules.acs._consts import ( @@ -134,6 +135,65 @@ def test_models(self): ) +class AKSPreviewGetMonitoringAddonKeyTestCase(unittest.TestCase): + """Tests for the standalone _get_monitoring_addon_key helper function.""" + + def setUp(self): + register_aks_preview_resource_type() + self.cli_ctx = MockCLI() + self.cmd = MockCmd(self.cli_ctx) + self.models = AKSPreviewManagedClusterModels(self.cmd, CUSTOM_MGMT_AKS_PREVIEW) + self.addon_consts = { + "CONST_MONITORING_ADDON_NAME": CONST_MONITORING_ADDON_NAME, + } + + def test_returns_lowercase_key_when_present(self): + cluster = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True), + }, + ) + result = _get_monitoring_addon_key(cluster, self.addon_consts) + self.assertEqual(result, CONST_MONITORING_ADDON_NAME) + + def test_returns_camelcase_key_when_present(self): + cluster = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True), + }, + ) + result = _get_monitoring_addon_key(cluster, self.addon_consts) + self.assertEqual(result, CONST_MONITORING_ADDON_NAME_CAMELCASE) + + def test_prefers_lowercase_when_both_present(self): + cluster = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True), + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True), + }, + ) + result = _get_monitoring_addon_key(cluster, self.addon_consts) + self.assertEqual(result, CONST_MONITORING_ADDON_NAME) + + def test_returns_default_when_no_addon_profiles(self): + cluster = self.models.ManagedCluster(location="test_location") + result = _get_monitoring_addon_key(cluster, self.addon_consts) + self.assertEqual(result, CONST_MONITORING_ADDON_NAME) + + def test_returns_default_when_neither_key_present(self): + cluster = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "some_other_addon": self.models.ManagedClusterAddonProfile(enabled=True), + }, + ) + result = _get_monitoring_addon_key(cluster, self.addon_consts) + self.assertEqual(result, CONST_MONITORING_ADDON_NAME) + + class AKSPreviewManagedClusterContextTestCase(unittest.TestCase): def setUp(self): # manually register CUSTOM_MGMT_AKS_PREVIEW @@ -8311,11 +8371,11 @@ def _make_cluster_with_monitoring(self): }, ) - def test_postprocessing_create_dcr_false_when_only_enable_addons(self): - """create_dcr=False when only enable_addons triggers ensure_container_insights_for_monitoring. + def test_postprocessing_create_dcr_true_when_only_enable_addons(self): + """create_dcr=True when enable_addons triggers ensure_container_insights_for_monitoring. - enable_addons is in the outer condition but NOT in cnl_or_hlsm_changing, - so create_dcr must be False. + DCR creation is now always requested during postprocessing so the + DCR is created alongside the DCRA after the cluster exists. """ dec = self._make_postprocessing_decorator({"enable_addons": "monitoring"}) cluster = self._make_cluster_with_monitoring() @@ -8326,7 +8386,7 @@ def test_postprocessing_create_dcr_false_when_only_enable_addons(self): dec.postprocessing_after_mc_created(cluster) mock_ecifm.assert_called_once() _, kwargs = mock_ecifm.call_args - self.assertFalse(kwargs["create_dcr"]) + self.assertTrue(kwargs["create_dcr"]) def test_postprocessing_ensure_container_insights_not_called_without_relevant_flags(self): """ensure_container_insights_for_monitoring is NOT called when no relevant flags are set. @@ -8493,11 +8553,11 @@ def test_postprocessing_create_dcr_true_with_camelcase_addon_key(self): _, kwargs = mock_ecifm.call_args self.assertTrue(kwargs["create_dcr"]) - def test_postprocessing_create_dcr_false_when_enable_azure_monitor_logs(self): - """create_dcr=False when enable_azure_monitor_logs triggers ensure_container_insights_for_monitoring. + def test_postprocessing_create_dcr_true_when_enable_azure_monitor_logs(self): + """create_dcr=True when enable_azure_monitor_logs triggers ensure_container_insights_for_monitoring. - enable_azure_monitor_logs is in the outer _should_create_dcra condition but NOT in - _is_cnl_or_hlsm_changing, so create_dcr must be False. + DCR creation is now always requested during postprocessing so the + DCR is created alongside the DCRA after the cluster exists. """ dec = self._make_postprocessing_decorator({"enable_azure_monitor_logs": True}) cluster = self._make_cluster_with_monitoring() @@ -8508,8 +8568,156 @@ def test_postprocessing_create_dcr_false_when_enable_azure_monitor_logs(self): dec.postprocessing_after_mc_created(cluster) mock_ecifm.assert_called_once() _, kwargs = mock_ecifm.call_args - self.assertFalse(kwargs["create_dcr"]) + self.assertTrue(kwargs["create_dcr"]) + + # ------------------------------------------------------------------ + # Tests for _should_create_dcra and _is_cnl_or_hlsm_changing helpers + # ------------------------------------------------------------------ + def test_is_cnl_or_hlsm_changing_true_for_cnl_flags(self): + """_is_cnl_or_hlsm_changing returns True when any CNL/HLSM flag is set.""" + for param_name in [ + "enable_container_network_logs", + "enable_retina_flow_logs", + "disable_container_network_logs", + "disable_retina_flow_logs", + ]: + dec = self._make_postprocessing_decorator({param_name: True}) + self.assertTrue(dec._is_cnl_or_hlsm_changing(), f"Expected True for {param_name}") + + def test_is_cnl_or_hlsm_changing_true_for_hlsm_flag(self): + dec = self._make_postprocessing_decorator({"enable_high_log_scale_mode": True}) + self.assertTrue(dec._is_cnl_or_hlsm_changing()) + + def test_is_cnl_or_hlsm_changing_false_when_no_flags(self): + dec = self._make_postprocessing_decorator({}) + self.assertFalse(dec._is_cnl_or_hlsm_changing()) + + def test_should_create_dcra_true_for_enable_addons(self): + dec = self._make_postprocessing_decorator({"enable_addons": "monitoring"}) + self.assertTrue(dec._should_create_dcra()) + + def test_should_create_dcra_true_for_enable_azure_monitor_logs(self): + dec = self._make_postprocessing_decorator({"enable_azure_monitor_logs": True}) + self.assertTrue(dec._should_create_dcra()) + + def test_should_create_dcra_true_for_cnl_flag(self): + dec = self._make_postprocessing_decorator({"enable_container_network_logs": True}) + self.assertTrue(dec._should_create_dcra()) + + def test_should_create_dcra_false_when_no_relevant_flags(self): + dec = self._make_postprocessing_decorator({}) + self.assertFalse(dec._should_create_dcra()) + + # ------------------------------------------------------------------ + # Tests for _postprocess_monitoring_disable + # ------------------------------------------------------------------ + def test_postprocess_monitoring_disable_calls_ensure_container_insights(self): + """_postprocess_monitoring_disable calls ensure_container_insights with remove_monitoring=True.""" + dec = self._make_postprocessing_decorator({}) + # Mock client.get to return cluster with monitoring addon + cluster_with_monitoring = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True) + }, + ) + dec.client = Mock() + dec.client.get = Mock(return_value=cluster_with_monitoring) + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec._postprocess_monitoring_disable() + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["remove_monitoring"]) + def test_postprocess_monitoring_disable_no_addon_is_noop(self): + """_postprocess_monitoring_disable is a no-op when no monitoring addon on current cluster.""" + dec = self._make_postprocessing_decorator({}) + cluster_without_monitoring = self.models.ManagedCluster(location="test_location") + dec.client = Mock() + dec.client.get = Mock(return_value=cluster_without_monitoring) + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec._postprocess_monitoring_disable() + mock_ecifm.assert_not_called() + + def test_postprocess_monitoring_disable_swallows_type_error(self): + """_postprocess_monitoring_disable should not raise on TypeError from ensure_container_insights.""" + dec = self._make_postprocessing_decorator({}) + cluster_with_monitoring = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True) + }, + ) + dec.client = Mock() + dec.client.get = Mock(return_value=cluster_with_monitoring) + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", side_effect=TypeError("test") + ): + # Should not raise + dec._postprocess_monitoring_disable() + + # ------------------------------------------------------------------ + # Tests for put_mc conditional postprocessing + # ------------------------------------------------------------------ + def test_put_mc_with_postprocessing(self): + """put_mc waits for the operation and calls postprocessing when needed.""" + dec = self._make_postprocessing_decorator({"enable_addons": "monitoring"}) + mc = dec.context.mc + dec.client = Mock() + mock_poller = Mock() + dec.client.begin_create_or_update = Mock(return_value=mock_poller) + returned_cluster = self._make_cluster_with_monitoring() + with patch( + "azext_aks_preview.managed_cluster_decorator.LongRunningOperation", + return_value=Mock(return_value=returned_cluster), + ), patch.object(dec, "postprocessing_after_mc_created") as mock_post, \ + patch.object(dec, "immediate_processing_after_request"): + result = dec.put_mc(mc) + mock_post.assert_called_once_with(returned_cluster) + self.assertEqual(result, returned_cluster) + + def test_put_mc_without_postprocessing_uses_sdk_no_wait(self): + """put_mc uses sdk_no_wait when no postprocessing is required.""" + dec = self._make_postprocessing_decorator({}) + mc = dec.context.mc + dec.client = Mock() + expected_result = Mock() + with patch.object(dec, "check_is_postprocessing_required", return_value=False), \ + patch( + "azext_aks_preview.managed_cluster_decorator.sdk_no_wait", + return_value=expected_result, + ) as mock_sdk_no_wait: + result = dec.put_mc(mc) + mock_sdk_no_wait.assert_called_once() + self.assertEqual(result, expected_result) + + # ------------------------------------------------------------------ + # Test for postprocessing_after_mc_created with monitoring_being_enabled bypass + # ------------------------------------------------------------------ + def test_postprocessing_enable_hlsm_with_monitoring_being_enabled_simultaneously(self): + """When enable_high_log_scale_mode=True and monitoring is being enabled in same command, + the standalone HLSM validation should be skipped (monitoring_being_enabled=True path).""" + dec = self._make_postprocessing_decorator({ + "enable_addons": "monitoring", + "enable_high_log_scale_mode": True, + }) + cluster = self._make_cluster_with_monitoring() + external_functions = dec.context.external_functions + with patch.object( + external_functions, "ensure_container_insights_for_monitoring", return_value=None + ) as mock_ecifm: + dec.postprocessing_after_mc_created(cluster) + mock_ecifm.assert_called_once() + _, kwargs = mock_ecifm.call_args + self.assertTrue(kwargs["create_dcr"]) + self.assertTrue(kwargs["enable_high_log_scale_mode"]) def test_set_up_health_monitor_profile(self): # no flag - no change @@ -11660,6 +11868,124 @@ def test_disable_azure_monitor_logs_with_omsagent_lowercase(self): self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) self.assertIsNone(mc_1.addon_profiles["omsagent"].config) + def test_disable_azure_monitor_logs_disables_container_insights(self): + # Test that _disable_azure_monitor_logs disables both addon profile AND + # azureMonitorProfile.containerInsights (the new API surface) + dec_1 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + + mc_1 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": "/subscriptions/test/workspace", + "useAADAuth": "false" + } + ) + }, + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + container_insights=self.models.ManagedClusterAzureMonitorProfileContainerInsights( + enabled=True, + log_analytics_workspace_resource_id="/subscriptions/test/workspace" + ) + ), + ) + dec_1.context.attach_mc(mc_1) + + dec_1._disable_azure_monitor_logs(mc_1) + + # Verify addon profile is disabled + self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) + self.assertIsNone(mc_1.addon_profiles["omsagent"].config) + + # Verify container_insights is also disabled + self.assertFalse(mc_1.azure_monitor_profile.container_insights.enabled) + + def test_disable_azure_monitor_logs_disables_container_insights_camelcase(self): + # Same test but with omsAgent (camelCase) key + dec_1 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + + mc_1 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsAgent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": "/subscriptions/test/workspace", + "useAADAuth": "false" + } + ) + }, + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + container_insights=self.models.ManagedClusterAzureMonitorProfileContainerInsights( + enabled=True, + log_analytics_workspace_resource_id="/subscriptions/test/workspace" + ) + ), + ) + dec_1.context.attach_mc(mc_1) + + dec_1._disable_azure_monitor_logs(mc_1) + + # Verify addon profile is disabled + self.assertFalse(mc_1.addon_profiles["omsAgent"].enabled) + self.assertIsNone(mc_1.addon_profiles["omsAgent"].config) + + # Verify container_insights is also disabled + self.assertFalse(mc_1.azure_monitor_profile.container_insights.enabled) + + def test_disable_azure_monitor_logs_without_container_insights(self): + # Test that disable works when container_insights is not set (addon-only cluster) + dec_1 = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + + mc_1 = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + "omsagent": self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": "/subscriptions/test/workspace", + "useAADAuth": "false" + } + ) + }, + ) + dec_1.context.attach_mc(mc_1) + + dec_1._disable_azure_monitor_logs(mc_1) + + # Verify addon profile is disabled + self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) + self.assertIsNone(mc_1.addon_profiles["omsagent"].config) + + # container_insights was never set, should not error + self.assertIsNone(mc_1.azure_monitor_profile) + def test_get_enable_opentelemetry_logs_validation_with_omsagent_camelcase(self): # Test that OpenTelemetry logs validation recognizes omsAgent (camelCase) as enabled ctx_1 = AKSPreviewManagedClusterContext( @@ -15949,6 +16275,238 @@ def test_update_health_monitor_profile(self): ) self.assertEqual(dec_mc_3, ground_truth_mc_3) + # ------------------------------------------------------------------ + # Tests for _setup_azure_monitor_logs setting enableRetinaNetworkFlags + # ------------------------------------------------------------------ + def test_setup_azure_monitor_logs_sets_retina_flags_when_cnl_enabled(self): + """_setup_azure_monitor_logs sets enableRetinaNetworkFlags in config when CNL is being enabled.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "enable_container_network_logs": True, + "workspace_resource_id": "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/test", + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking(enabled=True), + ), + addon_profiles={}, + ) + dec.context.attach_mc(mc) + + with patch.object( + dec.context.external_functions, + "sanitize_loganalytics_ws_resource_id", + side_effect=lambda x: x, + ): + dec._setup_azure_monitor_logs(mc) + + addon_profile = mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) + self.assertIsNotNone(addon_profile) + self.assertEqual(addon_profile.config.get("enableRetinaNetworkFlags"), "True") + + def test_setup_azure_monitor_logs_no_retina_flags_without_cnl(self): + """_setup_azure_monitor_logs does NOT set enableRetinaNetworkFlags when CNL is not specified.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "workspace_resource_id": "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/test", + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={}, + ) + dec.context.attach_mc(mc) + + with patch.object( + dec.context.external_functions, + "sanitize_loganalytics_ws_resource_id", + side_effect=lambda x: x, + ): + dec._setup_azure_monitor_logs(mc) + + addon_profile = mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) + self.assertIsNotNone(addon_profile) + self.assertNotIn("enableRetinaNetworkFlags", addon_profile.config) + + # ------------------------------------------------------------------ + # Tests for _disable_azure_monitor_logs disabling containerInsights + # ------------------------------------------------------------------ + def test_disable_azure_monitor_logs_disables_container_insights_with_msi_auth(self): + """_disable_azure_monitor_logs sets containerInsights.enabled=False when MSI auth is enabled, + triggering DCR/DCRA cleanup path.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "true", + CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID: "/subscriptions/test/rg/ws", + }, + ), + }, + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + container_insights=self.models.ManagedClusterAzureMonitorProfileContainerInsights( + enabled=True, + ), + ), + ) + dec.context.attach_mc(mc) + dec.client = Mock() + dec.client.get = Mock(return_value=mc) + with patch.object(dec.context, "get_subscription_id", return_value="test-sub"), \ + patch.object(dec.context, "get_resource_group_name", return_value="test-rg"), \ + patch.object(dec.context, "get_name", return_value="test-cluster"), \ + patch.object( + dec.context.external_functions, "ensure_container_insights_for_monitoring", return_value=None, + ): + dec._disable_azure_monitor_logs(mc) + self.assertFalse(mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled) + self.assertFalse(mc.azure_monitor_profile.container_insights.enabled) + + def test_disable_azure_monitor_logs_no_container_insights_skips(self): + """_disable_azure_monitor_logs works fine when azureMonitorProfile has no containerInsights.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + CONST_MONITORING_USING_AAD_MSI_AUTH: "false", + }, + ), + }, + ) + dec.context.attach_mc(mc) + dec.client = Mock() + dec.client.get = Mock(return_value=mc) + dec._disable_azure_monitor_logs(mc) + self.assertFalse(mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled) + + # ------------------------------------------------------------------ + # Tests for update_monitoring_profile_flow_logs: monitoring_being_enabled bypass + # ------------------------------------------------------------------ + def test_update_hlsm_standalone_skips_validation_when_monitoring_being_enabled(self): + """When enable_high_log_scale_mode=True and monitoring is being enabled simultaneously, + the validation of existing addon state is skipped.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + "enable_azure_monitor_logs": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + # No monitoring addon present yet — would raise RequiredArgumentMissingError + # if validation wasn't bypassed. + mc = self.models.ManagedCluster(location="test_location") + dec.context.attach_mc(mc) + # Should NOT raise because monitoring is being enabled in the same command + dec.update_monitoring_profile_flow_logs(mc) + self.assertTrue( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + def test_update_hlsm_standalone_skips_validation_when_enable_addons_monitoring(self): + """When enable_high_log_scale_mode=True and --enable-addons monitoring in same command, + the validation of existing addon state is skipped.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_high_log_scale_mode": True, + "enable_addons": "monitoring", + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster(location="test_location") + dec.context.attach_mc(mc) + dec.update_monitoring_profile_flow_logs(mc) + self.assertTrue( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + def test_update_enable_cnl_sets_postprocessing_flag(self): + """Enabling CNL via enable_container_network_logs sets the postprocessing flag.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_container_network_logs": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking(enabled=True), + ), + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec.context.attach_mc(mc) + dec.update_monitoring_profile_flow_logs(mc) + self.assertTrue( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + def test_update_enable_retina_flow_logs_sets_postprocessing_flag(self): + """Enabling CNL via legacy enable_retina_flow_logs sets the postprocessing flag.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + {"enable_retina_flow_logs": True}, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + network_profile=self.models.ContainerServiceNetworkProfile( + advanced_networking=self.models.AdvancedNetworking(enabled=True), + ), + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={CONST_MONITORING_USING_AAD_MSI_AUTH: "true"}, + ) + }, + ) + dec.context.attach_mc(mc) + dec.update_monitoring_profile_flow_logs(mc) + self.assertTrue( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + if __name__ == "__main__": unittest.main() From 3c5402dd7865511724c4f6dc3a2ebbef547b91c4 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 23 Mar 2026 15:47:16 +0000 Subject: [PATCH 19/32] Fix tests --- .../azext_aks_preview/tests/latest/test_aks_commands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index 32dd6f6a54f..ac4828eeaef 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19761,6 +19761,7 @@ def test_aks_update_standalone_enable_high_log_scale_mode( checks=[self.is_empty()], ) + @live_only() @AllowLargeResponse() @AKSCustomResourceGroupPreparer( random_name_length=17, From 097cb7c50396d85cd2a4c9c7a4bd948410a2be96 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 23 Mar 2026 16:55:40 +0000 Subject: [PATCH 20/32] Fix live test --- .../azext_aks_preview/tests/latest/test_aks_commands.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index ac4828eeaef..83678b66664 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19661,12 +19661,14 @@ def test_aks_update_disable_azuremonitorlogs( ]) # Disable Azure Monitor Logs + # Note: provisioningState may be "Updating" here due to a race between + # DCRA deletion (fire-and-forget LRO) and the subsequent PUT request. + # The aks show below verifies the final Succeeded state. disable_cmd = ( "aks update --resource-group={resource_group} --name={name} --yes " "--disable-azure-monitor-logs --output=json" ) self.cmd(disable_cmd, checks=[ - self.check("provisioningState", "Succeeded"), self.check("addonProfiles.omsagent.enabled", False), ]) From ef4202bba714e749f9a3f35ad686c40aa22ea919 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 10:01:32 +0000 Subject: [PATCH 21/32] Fix live test --- .../azext_aks_preview/tests/latest/test_aks_commands.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index 83678b66664..bc1c8d15ee6 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19748,6 +19748,10 @@ def test_aks_update_standalone_enable_high_log_scale_mode( self.check("contains(properties.dataFlows[0].streams, 'Microsoft-ContainerLogV2-HighScale')", True), ]) + # Wait for any in-progress addon operations to complete before next update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + # Now disable high log scale mode disable_cmd = ( "aks update --resource-group={resource_group} --name={name} --yes " From ceb6d086dbd05fafd1400705923dee46e76118c9 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 24 Mar 2026 10:54:44 +0000 Subject: [PATCH 22/32] Fix tests --- .../tests/latest/test_aks_commands.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index bc1c8d15ee6..a03efba9c0d 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19433,6 +19433,10 @@ def test_aks_update_enable_azuremonitorlogs_with_hlsm( self.check("provisioningState", "Succeeded"), ]) + # Wait for any in-progress addon operations to complete before next update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + # Update: enable monitoring with high log scale mode update_cmd = ( "aks update --resource-group={resource_group} --name={name} --yes " @@ -19529,6 +19533,10 @@ def test_aks_create_with_retina_flow_logs_alias( self.check('properties.dataFlows[0].streams[-1]', 'Microsoft-ContainerNetworkLogs'), ]) + # Wait for any in-progress addon operations to complete before next update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + # Disable via the alias — run twice like test_aks_create_acns_with_flow_logs # (first run applies the change, second run verifies the result) disable_cmd = "aks update --resource-group={resource_group} --name={name} --disable-retina-flow-logs -o json" @@ -19586,6 +19594,10 @@ def test_aks_update_enable_cnl_via_azuremonitorlogs( self.check("networkProfile.advancedNetworking.enabled", True), ]) + # Wait for any in-progress addon operations to complete before next update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + # Update: enable monitoring + CNL together update_cmd = ( "aks update --resource-group={resource_group} --name={name} --yes " @@ -19593,14 +19605,18 @@ def test_aks_update_enable_cnl_via_azuremonitorlogs( "--aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/AdvancedNetworkingFlowLogsPreview " "--output=json" ) - self.cmd(update_cmd, checks=[ + self.cmd(update_cmd) + + # Wait for the update to fully complete, then verify via aks show + self.cmd(wait_cmd, checks=[self.is_empty()]) + show_cmd = "aks show --resource-group={resource_group} --name={name} --output=json" + self.cmd(show_cmd, checks=[ self.check("provisioningState", "Succeeded"), self.check("addonProfiles.omsagent.enabled", True), self.check("addonProfiles.omsagent.config.enableRetinaNetworkFlags", "True"), ]) # Verify DCR was created with ContainerNetworkLogs stream - show_cmd = "aks show --resource-group={resource_group} --name={name} --output=json" response = self.cmd(show_cmd).get_output_in_json() cluster_resource_id = response["id"] From ec2d69ed02d773057c371d673ca659f069355e4e Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Fri, 27 Mar 2026 10:15:13 +0000 Subject: [PATCH 23/32] PR comments --- src/aks-preview/azext_aks_preview/_helpers.py | 26 ++- src/aks-preview/azext_aks_preview/custom.py | 28 ++- .../managed_cluster_decorator.py | 171 ++++++----------- .../tests/latest/test_helpers.py | 49 +++++ .../latest/test_managed_cluster_decorator.py | 175 +++++++++++++----- 5 files changed, 277 insertions(+), 172 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/_helpers.py b/src/aks-preview/azext_aks_preview/_helpers.py index 1f035643969..e1c31d582e9 100644 --- a/src/aks-preview/azext_aks_preview/_helpers.py +++ b/src/aks-preview/azext_aks_preview/_helpers.py @@ -389,6 +389,27 @@ def check_is_azure_cli_core_editable_installed(): return False +def get_monitoring_addon_key(addon_profiles, monitoring_addon_name): + """Return the canonical key for the monitoring addon, normalizing non-standard casing. + + The API response may return the monitoring addon key in any casing (e.g. + "omsagent", "omsAgent", "oMSaGent"). This helper performs a + case-insensitive lookup and, when a non-standard key is found, re-keys + addon_profiles in-place so that subsequent code always uses the canonical + monitoring_addon_name (lowercase) form. + """ + if addon_profiles is None: + return monitoring_addon_name + if monitoring_addon_name in addon_profiles: + return monitoring_addon_name + target_lower = monitoring_addon_name.lower() + for key in list(addon_profiles): + if key.lower() == target_lower: + addon_profiles[monitoring_addon_name] = addon_profiles.pop(key) + return monitoring_addon_name + return monitoring_addon_name + + def check_is_monitoring_addon_enabled(addons, instance): is_monitoring_addon_enabled = False is_monitoring_addon = False @@ -401,10 +422,11 @@ def check_is_monitoring_addon_enabled(addons, instance): is_monitoring_addon = True break addon_profiles = instance.addon_profiles or {} + monitoring_addon_key = get_monitoring_addon_key(addon_profiles, CONST_MONITORING_ADDON_NAME) is_monitoring_addon_enabled = ( is_monitoring_addon - and CONST_MONITORING_ADDON_NAME in addon_profiles - and addon_profiles[CONST_MONITORING_ADDON_NAME].enabled + and monitoring_addon_key in addon_profiles + and addon_profiles[monitoring_addon_key].enabled ) except Exception as ex: # pylint: disable=broad-except logger.debug("failed to check monitoring addon enabled: %s", ex) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index b2aef38e18e..891525c61e5 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -66,6 +66,7 @@ check_is_private_link_cluster, get_cluster_snapshot_by_snapshot_id, get_k8s_extension_module, + get_monitoring_addon_key, get_nodepool_snapshot_by_snapshot_id, print_or_merge_credentials, process_message_for_run_command, @@ -3048,14 +3049,16 @@ def aks_disable_addons(cmd, client, resource_group_name, name, addons, no_wait=F subscription_id = get_subscription_id(cmd.cli_ctx) try: + addon_profiles = instance.addon_profiles or {} + monitoring_addon_key = get_monitoring_addon_key(addon_profiles, CONST_MONITORING_ADDON_NAME) if ( addons == "monitoring" and - CONST_MONITORING_ADDON_NAME in instance.addon_profiles and - instance.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled and + monitoring_addon_key in addon_profiles and + addon_profiles[monitoring_addon_key].enabled and CONST_MONITORING_USING_AAD_MSI_AUTH in - instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config and + addon_profiles[monitoring_addon_key].config and str( - instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config[ + addon_profiles[monitoring_addon_key].config[ CONST_MONITORING_USING_AAD_MSI_AUTH ] ).lower() == "true" @@ -3063,7 +3066,7 @@ def aks_disable_addons(cmd, client, resource_group_name, name, addons, no_wait=F # remove the DCR association because otherwise the DCR can't be deleted ensure_container_insights_for_monitoring( cmd, - instance.addon_profiles[CONST_MONITORING_ADDON_NAME], + addon_profiles[monitoring_addon_key], subscription_id, resource_group_name, name, @@ -3166,11 +3169,13 @@ def aks_enable_addons( if ( is_monitoring_addon_enabled ): + addon_profiles = instance.addon_profiles or {} + monitoring_addon_key = get_monitoring_addon_key(addon_profiles, CONST_MONITORING_ADDON_NAME) if ( CONST_MONITORING_USING_AAD_MSI_AUTH in - instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config and + addon_profiles[monitoring_addon_key].config and str( - instance.addon_profiles[CONST_MONITORING_ADDON_NAME].config[ + addon_profiles[monitoring_addon_key].config[ CONST_MONITORING_USING_AAD_MSI_AUTH ] ).lower() == "true" @@ -3178,10 +3183,15 @@ def aks_enable_addons( if not msi_auth: raise ArgumentUsageError( "--enable-msi-auth-for-monitoring can not be used on clusters with service principal auth.") + # Auto-enable HLSM when CNL is active and HLSM wasn't explicitly set + if enable_high_log_scale_mode is None and \ + (addon_profiles[monitoring_addon_key].config or {}).get( + "enableRetinaNetworkFlags", "").lower() == "true": + enable_high_log_scale_mode = True # create a Data Collection Rule (DCR) and associate it with the cluster ensure_container_insights_for_monitoring( cmd, - instance.addon_profiles[CONST_MONITORING_ADDON_NAME], + addon_profiles[monitoring_addon_key], subscription_id, resource_group_name, name, @@ -3209,7 +3219,7 @@ def aks_enable_addons( raise ArgumentUsageError("--ampls-resource-id can not be used without MSI auth.") ensure_container_insights_for_monitoring( cmd, - instance.addon_profiles[CONST_MONITORING_ADDON_NAME], + addon_profiles[monitoring_addon_key], subscription_id, resource_group_name, name, diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 063a0ec2fcc..c6e9924d47b 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -7,8 +7,6 @@ import copy import datetime import os -import random -import time from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union @@ -68,6 +66,7 @@ check_is_azure_cli_core_editable_installed, check_is_private_cluster, get_cluster_snapshot_by_snapshot_id, + get_monitoring_addon_key, filter_hard_taints, ) from azext_aks_preview._loadbalancer import create_load_balancer_profile @@ -102,7 +101,6 @@ from azext_aks_preview.custom import ( ensure_container_insights_for_monitoring_preview, ) -from azure.core.exceptions import HttpResponseError, ResourceExistsError from azure.cli.command_modules.acs._client_factory import get_graph_client from azure.cli.command_modules.acs._consts import ( CONST_OUTBOUND_TYPE_LOAD_BALANCER, @@ -175,19 +173,12 @@ ResourceReference = TypeVar("ResourceReference") -def _get_monitoring_addon_key(cluster, addon_consts): - """Resolve the monitoring addon key from the cluster's addon_profiles. - - The API response may return the addon key as either "omsagent" or "omsAgent". - Returns the key present in addon_profiles, or the default constant if neither is found. - """ - const_monitoring = addon_consts.get("CONST_MONITORING_ADDON_NAME") - if cluster.addon_profiles: - if const_monitoring in cluster.addon_profiles: - return const_monitoring - if CONST_MONITORING_ADDON_NAME_CAMELCASE in cluster.addon_profiles: - return CONST_MONITORING_ADDON_NAME_CAMELCASE - return const_monitoring +def _get_monitoring_addon_key_from_consts(addon_profiles, addon_consts): + """Thin wrapper around get_monitoring_addon_key that unpacks addon_consts dict.""" + return get_monitoring_addon_key( + addon_profiles, + addon_consts.get("CONST_MONITORING_ADDON_NAME"), + ) # pylint: disable=too-few-public-methods @@ -450,13 +441,11 @@ def get_enable_msi_auth_for_monitoring(self) -> Union[bool, None]: # but MSI-based clusters set client_id to "msi". Check if the monitoring addon # already has useAADAuth=true, which indicates MSI auth is actually in use. addon_consts = self.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") if self.mc and self.mc.addon_profiles: - monitoring_profile = ( - self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or - self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - ) + monitoring_addon_key = _get_monitoring_addon_key_from_consts( + self.mc.addon_profiles, addon_consts) + monitoring_profile = self.mc.addon_profiles.get(monitoring_addon_key) result = bool( monitoring_profile and monitoring_profile.config and str(monitoring_profile.config.get( @@ -1073,15 +1062,13 @@ def get_container_network_logs(self, mc: ManagedCluster) -> Union[bool, None]: "monitoring" in enable_addons or bool(self.raw_param.get("enable_azure_monitor_logs")) ) - monitoring_already_enabled = ( - mc.addon_profiles and - ( - (mc.addon_profiles.get("omsagent") and - mc.addon_profiles["omsagent"].enabled) or - (mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) and - mc.addon_profiles[CONST_MONITORING_ADDON_NAME_CAMELCASE].enabled) + monitoring_already_enabled = False + if mc.addon_profiles: + addon_consts = self.get_addon_consts() + mk = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) + monitoring_already_enabled = bool( + mc.addon_profiles.get(mk) and mc.addon_profiles[mk].enabled ) - ) monitoring_enabled = monitoring_being_enabled or monitoring_already_enabled if not acns_enabled or not monitoring_enabled: raise InvalidArgumentValueError( @@ -2879,15 +2866,13 @@ def _get_enable_opentelemetry_logs(self, enable_validation: bool = False) -> boo self.mc.azure_monitor_profile.container_insights.enabled ) - # Check legacy addon location (try both lowercase and camelCase) + # Check legacy addon location monitoring_addon_enabled = False if self.mc.addon_profiles: - # Try lowercase first (constant value) - if CONST_MONITORING_ADDON_NAME in self.mc.addon_profiles: - monitoring_addon_enabled = self.mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled - # Try camelCase variant (what Azure actually returns) - elif CONST_MONITORING_ADDON_NAME_CAMELCASE in self.mc.addon_profiles: - monitoring_addon_enabled = self.mc.addon_profiles[CONST_MONITORING_ADDON_NAME_CAMELCASE].enabled + addon_consts = self.get_addon_consts() + mk = _get_monitoring_addon_key_from_consts(self.mc.addon_profiles, addon_consts) + if mk in self.mc.addon_profiles: + monitoring_addon_enabled = self.mc.addon_profiles[mk].enabled monitoring_addon_currently_enabled = container_insights_enabled or monitoring_addon_enabled @@ -3036,10 +3021,9 @@ def get_enable_high_log_scale_mode(self) -> Union[bool, None]: # Check if monitoring addon is already enabled in the cluster monitoring_addon_enabled = False if self.mc and self.mc.addon_profiles: - if CONST_MONITORING_ADDON_NAME in self.mc.addon_profiles: - monitoring_addon_enabled = self.mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled - elif CONST_MONITORING_ADDON_NAME_CAMELCASE in self.mc.addon_profiles: - monitoring_addon_enabled = self.mc.addon_profiles[CONST_MONITORING_ADDON_NAME_CAMELCASE].enabled + mk = _get_monitoring_addon_key_from_consts(self.mc.addon_profiles, addon_consts) + if mk in self.mc.addon_profiles: + monitoring_addon_enabled = self.mc.addon_profiles[mk].enabled if not monitoring_being_enabled and not enable_azure_monitor_logs and not monitoring_addon_enabled: raise RequiredArgumentMissingError( @@ -3055,11 +3039,8 @@ def get_enable_high_log_scale_mode(self) -> Union[bool, None]: cnl_already_enabled = False if self.mc and self.mc.addon_profiles: addon_consts = self.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - monitoring_profile = ( - self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or - self.mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - ) + mk = _get_monitoring_addon_key_from_consts(self.mc.addon_profiles, addon_consts) + monitoring_profile = self.mc.addon_profiles.get(mk) if monitoring_profile and monitoring_profile.config: cnl_already_enabled = str( monitoring_profile.config.get("enableRetinaNetworkFlags", "") @@ -3072,9 +3053,6 @@ def get_enable_high_log_scale_mode(self) -> Union[bool, None]: ) # If container network logs are not being enabled, return the original value - # Return False if not explicitly set to maintain backward compatibility with base class - if enable_high_log_scale_mode is None: - return False return enable_high_log_scale_mode def _get_enable_vpa(self, enable_validation: bool = False) -> bool: @@ -4673,26 +4651,11 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if not workspace_resource_id: ensure_workspace_func = ( self.context.external_functions.ensure_default_log_analytics_workspace_for_monitoring) - # Retry with exponential backoff + jitter to handle 409 Conflict when - # workspace is still provisioning (common when parallel commands target - # the same default workspace). Delays: ~5 s, ~10 s, ~20 s (worst-case - # total ~52 s; fast path ~7 s if the first retry succeeds). - max_attempts = 4 - for attempt in range(max_attempts): - try: - workspace_resource_id = ensure_workspace_func( - self.cmd, - self.context.get_subscription_id(), - self.context.get_resource_group_name() - ) - break - except (HttpResponseError, ResourceExistsError): - if attempt < max_attempts - 1: - base_delay = 5 * (2 ** attempt) # 5 s, 10 s, 20 s - jitter = random.uniform(0, base_delay * 0.5) - time.sleep(base_delay + jitter) - else: - raise + workspace_resource_id = ensure_workspace_func( + self.cmd, + self.context.get_subscription_id(), + self.context.get_resource_group_name() + ) # Sanitize and configure sanitize_func = self.context.external_functions.sanitize_loganalytics_ws_resource_id @@ -5575,7 +5538,8 @@ def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: ) elif self._should_create_dcra(): addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key(cluster, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts( + cluster.addon_profiles, addon_consts) if cluster.addon_profiles else addon_consts.get("CONST_MONITORING_ADDON_NAME") self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, cluster.addon_profiles[monitoring_addon_key], @@ -5899,7 +5863,7 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus if container_network_logs_enabled is not None: if mc.addon_profiles: addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key(mc, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) monitoring_addon_profile = mc.addon_profiles.get(monitoring_addon_key) if monitoring_addon_profile: config = monitoring_addon_profile.config or {} @@ -5927,16 +5891,13 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus if not monitoring_being_enabled: # Only validate existing addon state when not enabling monitoring simultaneously addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - # Resolve the addon profile, handling both "omsagent" and "omsAgent" key variants. + # Resolve the addon profile, normalizing non-standard key casing. monitoring_addon_profile = None if mc.addon_profiles: - monitoring_addon_profile = ( - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - ) + mk = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) + monitoring_addon_profile = mc.addon_profiles.get(mk) if not monitoring_addon_profile or not monitoring_addon_profile.enabled: raise RequiredArgumentMissingError( @@ -5963,11 +5924,8 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus cnl_already_enabled = False if mc.addon_profiles: addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - monitoring_profile = ( - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME) or - mc.addon_profiles.get(CONST_MONITORING_ADDON_NAME_CAMELCASE) - ) + mk = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) + monitoring_profile = mc.addon_profiles.get(mk) if monitoring_profile and monitoring_profile.config: cnl_already_enabled = str( monitoring_profile.config.get("enableRetinaNetworkFlags", "") @@ -7861,26 +7819,11 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if not workspace_resource_id: ensure_workspace_func = ( self.context.external_functions.ensure_default_log_analytics_workspace_for_monitoring) - # Retry with exponential backoff + jitter to handle 409 Conflict when - # workspace is still provisioning (common when parallel commands target - # the same default workspace). Delays: ~5 s, ~10 s, ~20 s (worst-case - # total ~52 s; fast path ~7 s if the first retry succeeds). - max_attempts = 4 - for attempt in range(max_attempts): - try: - workspace_resource_id = ensure_workspace_func( - self.cmd, - self.context.get_subscription_id(), - self.context.get_resource_group_name() - ) - break - except (HttpResponseError, ResourceExistsError): - if attempt < max_attempts - 1: - base_delay = 5 * (2 ** attempt) # 5 s, 10 s, 20 s - jitter = random.uniform(0, base_delay * 0.5) - time.sleep(base_delay + jitter) - else: - raise + workspace_resource_id = ensure_workspace_func( + self.cmd, + self.context.get_subscription_id(), + self.context.get_resource_group_name() + ) # Sanitize and configure sanitize_func = self.context.external_functions.sanitize_loganalytics_ws_resource_id @@ -7931,16 +7874,14 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: def _disable_azure_monitor_logs(self, mc: ManagedCluster) -> None: """Disable Azure Monitor logs configuration.""" addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - # Check if the addon profile exists (check both lowercase and camelCase) + # Normalize the addon key (handles any casing variant) addon_key = None if mc.addon_profiles: - if CONST_MONITORING_ADDON_NAME in mc.addon_profiles: - addon_key = CONST_MONITORING_ADDON_NAME - elif CONST_MONITORING_ADDON_NAME_CAMELCASE in mc.addon_profiles: - addon_key = CONST_MONITORING_ADDON_NAME_CAMELCASE + addon_key = _get_monitoring_addon_key_from_consts(mc.addon_profiles, addon_consts) + if addon_key not in mc.addon_profiles: + addon_key = None # If the addon profile doesn't exist at all, there's nothing to disable if not addon_key: @@ -7976,15 +7917,12 @@ def _disable_azure_monitor_logs(self, mc: ManagedCluster) -> None: # Fetch the current cluster state from Azure (same as aks_disable_addons line 2791) current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) - # Find the addon key in current_cluster (it may have different casing) - current_addon_key = None - if current_cluster.addon_profiles: - if CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles: - current_addon_key = CONST_MONITORING_ADDON_NAME - elif CONST_MONITORING_ADDON_NAME_CAMELCASE in current_cluster.addon_profiles: - current_addon_key = CONST_MONITORING_ADDON_NAME_CAMELCASE + # Find the addon key in current_cluster (normalize casing) + current_addon_key = _get_monitoring_addon_key_from_consts( + current_cluster.addon_profiles, addon_consts) if current_cluster.addon_profiles else None + has_addon = current_addon_key and current_addon_key in (current_cluster.addon_profiles or {}) - if current_addon_key: + if has_addon: try: # Use the current cluster's addon profile for cleanup (not the modified mc object) self.context.external_functions.ensure_container_insights_for_monitoring( @@ -8230,7 +8168,8 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: addon_consts = self.context.get_addon_consts() CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - monitoring_addon_key = _get_monitoring_addon_key(cluster, addon_consts) + monitoring_addon_key = _get_monitoring_addon_key_from_consts( + cluster.addon_profiles, addon_consts) if cluster.addon_profiles else addon_consts.get("CONST_MONITORING_ADDON_NAME") if (cluster.addon_profiles and monitoring_addon_key in cluster.addon_profiles and diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_helpers.py b/src/aks-preview/azext_aks_preview/tests/latest/test_helpers.py index 19b3146fae8..bfa1f5fc7e5 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_helpers.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_helpers.py @@ -11,6 +11,7 @@ check_is_private_link_cluster, get_cluster_snapshot, get_cluster_snapshot_by_snapshot_id, + get_monitoring_addon_key, get_nodepool_snapshot, get_nodepool_snapshot_by_snapshot_id, process_message_for_run_command, @@ -211,5 +212,53 @@ def test_filter_hard_taints_mixed_effects(self): expected_filtered_taints = ["key3=value3:PreferNoSchedule", "key5:PreferNoSchedule"] self.assertEqual(filter_hard_taints(input_taints), expected_filtered_taints) + +class TestGetMonitoringAddonKey(unittest.TestCase): + """Tests for the get_monitoring_addon_key helper.""" + + def test_returns_canonical_when_present(self): + addon_profiles = {"omsagent": Mock(enabled=True)} + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + # dict should be unchanged + self.assertIn("omsagent", addon_profiles) + + def test_normalizes_camelcase_key(self): + addon_profiles = {"omsAgent": Mock(enabled=True)} + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + # dict should have been re-keyed + self.assertIn("omsagent", addon_profiles) + self.assertNotIn("omsAgent", addon_profiles) + + def test_normalizes_arbitrary_casing(self): + addon_profiles = {"OMSagent": Mock(enabled=True)} + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + self.assertIn("omsagent", addon_profiles) + self.assertNotIn("OMSagent", addon_profiles) + + def test_returns_canonical_when_none_profiles(self): + result = get_monitoring_addon_key(None, "omsagent") + self.assertEqual(result, "omsagent") + + def test_returns_canonical_when_key_not_present(self): + addon_profiles = {"some_other_addon": Mock(enabled=True)} + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + + def test_prefers_exact_match_over_case_insensitive(self): + # If both canonical and variant exist, canonical wins (no re-keying) + addon_profiles = { + "omsagent": Mock(enabled=True), + "omsAgent": Mock(enabled=False), + } + result = get_monitoring_addon_key(addon_profiles, "omsagent") + self.assertEqual(result, "omsagent") + # Both keys should still be present (no re-keying needed) + self.assertIn("omsagent", addon_profiles) + self.assertIn("omsAgent", addon_profiles) + + if __name__ == "__main__": unittest.main() diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index c5c8c5d8ecb..bb30e26666a 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -73,7 +73,7 @@ AKSPreviewManagedClusterCreateDecorator, AKSPreviewManagedClusterModels, AKSPreviewManagedClusterUpdateDecorator, - _get_monitoring_addon_key, + _get_monitoring_addon_key_from_consts, ) from azext_aks_preview.tests.latest.utils import get_test_data_file_path from azure.cli.command_modules.acs._consts import ( @@ -136,7 +136,7 @@ def test_models(self): class AKSPreviewGetMonitoringAddonKeyTestCase(unittest.TestCase): - """Tests for the standalone _get_monitoring_addon_key helper function.""" + """Tests for the _get_monitoring_addon_key_from_consts helper function.""" def setUp(self): register_aks_preview_resource_type() @@ -148,49 +148,40 @@ def setUp(self): } def test_returns_lowercase_key_when_present(self): - cluster = self.models.ManagedCluster( - location="test_location", - addon_profiles={ - CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True), - }, - ) - result = _get_monitoring_addon_key(cluster, self.addon_consts) + addon_profiles = { + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True), + } + result = _get_monitoring_addon_key_from_consts(addon_profiles, self.addon_consts) self.assertEqual(result, CONST_MONITORING_ADDON_NAME) - def test_returns_camelcase_key_when_present(self): - cluster = self.models.ManagedCluster( - location="test_location", - addon_profiles={ - CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True), - }, - ) - result = _get_monitoring_addon_key(cluster, self.addon_consts) - self.assertEqual(result, CONST_MONITORING_ADDON_NAME_CAMELCASE) + def test_normalizes_camelcase_key(self): + addon_profiles = { + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True), + } + result = _get_monitoring_addon_key_from_consts(addon_profiles, self.addon_consts) + # After normalization, the canonical key should always be returned + self.assertEqual(result, CONST_MONITORING_ADDON_NAME) + # Dict should have been re-keyed in place + self.assertIn(CONST_MONITORING_ADDON_NAME, addon_profiles) + self.assertNotIn(CONST_MONITORING_ADDON_NAME_CAMELCASE, addon_profiles) def test_prefers_lowercase_when_both_present(self): - cluster = self.models.ManagedCluster( - location="test_location", - addon_profiles={ - CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True), - CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True), - }, - ) - result = _get_monitoring_addon_key(cluster, self.addon_consts) + addon_profiles = { + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile(enabled=True), + CONST_MONITORING_ADDON_NAME_CAMELCASE: self.models.ManagedClusterAddonProfile(enabled=True), + } + result = _get_monitoring_addon_key_from_consts(addon_profiles, self.addon_consts) self.assertEqual(result, CONST_MONITORING_ADDON_NAME) def test_returns_default_when_no_addon_profiles(self): - cluster = self.models.ManagedCluster(location="test_location") - result = _get_monitoring_addon_key(cluster, self.addon_consts) + result = _get_monitoring_addon_key_from_consts(None, self.addon_consts) self.assertEqual(result, CONST_MONITORING_ADDON_NAME) def test_returns_default_when_neither_key_present(self): - cluster = self.models.ManagedCluster( - location="test_location", - addon_profiles={ - "some_other_addon": self.models.ManagedClusterAddonProfile(enabled=True), - }, - ) - result = _get_monitoring_addon_key(cluster, self.addon_consts) + addon_profiles = { + "some_other_addon": self.models.ManagedClusterAddonProfile(enabled=True), + } + result = _get_monitoring_addon_key_from_consts(addon_profiles, self.addon_consts) self.assertEqual(result, CONST_MONITORING_ADDON_NAME) @@ -4897,7 +4888,7 @@ def test_get_enable_high_log_scale_mode_default(self): """Test default behavior when no container network logs or high log scale mode is specified. When enable_high_log_scale_mode is not explicitly set and container network logs are not enabled, - the method should return False (not None) to maintain backward compatibility with the base class. + the method should return None to align with the base class behavior. """ ctx = AKSPreviewManagedClusterContext( self.cmd, @@ -4906,7 +4897,7 @@ def test_get_enable_high_log_scale_mode_default(self): decorator_mode=DecoratorMode.CREATE, ) result = ctx.get_enable_high_log_scale_mode() - self.assertFalse(result) + self.assertIsNone(result) def test_get_enable_high_log_scale_mode_explicit_false_without_cnl(self): """Test when user explicitly sets enable_high_log_scale_mode to False without container network logs. @@ -11753,6 +11744,7 @@ def test_setup_azure_monitor_logs_with_omsagent_camelcase(self): # Verify: The parent class normalizes addon keys to lowercase in-place, # so "omsAgent" becomes "omsagent". The key point is no duplicate is created. self.assertIn("omsagent", mc_1.addon_profiles) + self.assertNotIn("omsAgent", mc_1.addon_profiles) self.assertEqual(len([k for k in mc_1.addon_profiles if k.lower() == "omsagent"]), 1) # No duplicate self.assertTrue(mc_1.addon_profiles["omsagent"].enabled) self.assertEqual( @@ -11827,10 +11819,10 @@ def test_disable_azure_monitor_logs_with_omsagent_camelcase(self): # Call _disable_azure_monitor_logs dec_1._disable_azure_monitor_logs(mc_1) - # Verify: omsAgent should be disabled - self.assertIn("omsAgent", mc_1.addon_profiles) - self.assertFalse(mc_1.addon_profiles["omsAgent"].enabled) - self.assertIsNone(mc_1.addon_profiles["omsAgent"].config) + # After normalization, the camelCase key is re-keyed to canonical lowercase + self.assertIn("omsagent", mc_1.addon_profiles) + self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) + self.assertIsNone(mc_1.addon_profiles["omsagent"].config) def test_disable_azure_monitor_logs_with_omsagent_lowercase(self): # Test that _disable_azure_monitor_logs handles omsagent (lowercase) correctly @@ -11944,9 +11936,9 @@ def test_disable_azure_monitor_logs_disables_container_insights_camelcase(self): dec_1._disable_azure_monitor_logs(mc_1) - # Verify addon profile is disabled - self.assertFalse(mc_1.addon_profiles["omsAgent"].enabled) - self.assertIsNone(mc_1.addon_profiles["omsAgent"].config) + # After normalization, the camelCase key is re-keyed to the canonical lowercase form + self.assertFalse(mc_1.addon_profiles["omsagent"].enabled) + self.assertIsNone(mc_1.addon_profiles["omsagent"].config) # Verify container_insights is also disabled self.assertFalse(mc_1.azure_monitor_profile.container_insights.enabled) @@ -14357,7 +14349,7 @@ def test_enable_container_network_logs(self): ), ), addon_profiles={ - "omsAgent": self.models.ManagedClusterAddonProfile( + "omsagent": self.models.ManagedClusterAddonProfile( enabled=True, config={"enableRetinaNetworkFlags": "False"} ) @@ -16507,6 +16499,99 @@ def test_update_enable_retina_flow_logs_sets_postprocessing_flag(self): dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) ) + def test_disable_monitoring_clears_cnl_and_hlsm_config(self): + """Disabling monitoring addon should wipe config including enableRetinaNetworkFlags (CNL) + and any HLSM-related settings, so that re-enabling does not carry them forward.""" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": "/subscriptions/test/workspace", + CONST_MONITORING_USING_AAD_MSI_AUTH: "false", + "enableRetinaNetworkFlags": "true", + }, + ), + }, + ) + dec.context.attach_mc(mc) + dec.client = Mock() + dec.client.get = Mock(return_value=mc) + + dec._disable_azure_monitor_logs(mc) + + # Config should be completely wiped — no CNL or HLSM values survive + self.assertFalse(mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled) + self.assertIsNone(mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config) + + def test_reenable_monitoring_after_disable_does_not_carry_cnl(self): + """Re-enabling monitoring after disable should produce a fresh config + without enableRetinaNetworkFlags (CNL) or HLSM-related keys.""" + # Step 1: start with monitoring enabled + CNL (useAADAuth=false to skip DCR cleanup) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": "/subscriptions/test/workspace", + CONST_MONITORING_USING_AAD_MSI_AUTH: "false", + "enableRetinaNetworkFlags": "true", + }, + ), + }, + ) + + # Step 2: disable monitoring + dec_disable = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_azure_monitor_logs": True, + "yes": True, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + dec_disable.context.attach_mc(mc) + dec_disable.client = Mock() + dec_disable.client.get = Mock(return_value=mc) + dec_disable._disable_azure_monitor_logs(mc) + self.assertIsNone(mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config) + + # Step 3: re-enable monitoring (no CNL flag passed) + # Simulate server round-trip: after PUT with config=None, the server + # returns the addon with config as empty dict, not None. + mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config = {} + dec_enable = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "workspace_resource_id": "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/new-workspace", + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + dec_enable.context.attach_mc(mc) + dec_enable.context.set_intermediate("subscription_id", "test-subscription-id") + dec_enable._setup_azure_monitor_logs(mc) + + # Config should only have workspace + MSI auth — no CNL or HLSM keys + addon_config = mc.addon_profiles[CONST_MONITORING_ADDON_NAME].config + self.assertTrue(mc.addon_profiles[CONST_MONITORING_ADDON_NAME].enabled) + self.assertIn("logAnalyticsWorkspaceResourceID", addon_config) + self.assertIn(CONST_MONITORING_USING_AAD_MSI_AUTH, addon_config) + self.assertNotIn("enableRetinaNetworkFlags", addon_config) + if __name__ == "__main__": unittest.main() From b1fa0dd5cf61f33ab0cbbaec07c92a0ef6774aa6 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Fri, 27 Mar 2026 10:20:36 +0000 Subject: [PATCH 24/32] Fix history --- src/aks-preview/HISTORY.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/aks-preview/HISTORY.rst b/src/aks-preview/HISTORY.rst index 062f9107c50..90a7e131af8 100644 --- a/src/aks-preview/HISTORY.rst +++ b/src/aks-preview/HISTORY.rst @@ -20,6 +20,11 @@ Pending * `az aks machine add`: Add `--enable-ultra-ssd` flag support to enable ultra ssd on a machine. * `az aks update`: Fix V2-only NAT gateway params (e.g. `--nat-gateway-managed-outbound-ipv6-count`) being rejected on update when `--outbound-type` is not re-specified for an already-V2 cluster. +20.0.0b2 ++++++++ +* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. +* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled. + 20.0.0b1 +++++++ * [Breaking Change] `az aks create/update`: Change `--nat-gateway-outbound-ips` and `--nat-gateway-outbound-ip-prefixes` to use comma-separated values, consistent with load balancer outbound IP parameters. @@ -45,11 +50,6 @@ Pending * `az aks namespace update`: Fix location should use existing namespace location. * `az aks nodepool update`: Add `--disable-artifact-streaming` to disable artifact streaming. -19.0.0b28 -+++++++ -* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. -* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled. - 19.0.0b27 +++++++ * `az aks nodepool add`: Fix `InvalidParameter` error when `mode` is `Machines`. From f21c0ea44421b65f38e75a016f5533c0a550094f Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Fri, 27 Mar 2026 10:53:01 +0000 Subject: [PATCH 25/32] Lint + fix unit test --- .../managed_cluster_decorator.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index c6e9924d47b..9bcd75cab7a 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -27,7 +27,6 @@ CONST_MANAGED_CLUSTER_SKU_TIER_FREE, CONST_MANAGED_CLUSTER_SKU_TIER_PREMIUM, CONST_MANAGED_CLUSTER_SKU_TIER_STANDARD, - CONST_MONITORING_ADDON_NAME_CAMELCASE, CONST_NETWORK_DATAPLANE_CILIUM, CONST_NETWORK_PLUGIN_AZURE, CONST_NETWORK_PLUGIN_MODE_OVERLAY, @@ -2857,7 +2856,6 @@ def _get_enable_opentelemetry_logs(self, enable_validation: bool = False) -> boo # 1. New API: azureMonitorProfile.containerInsights.enabled # 2. Legacy: addonProfiles.omsagent.enabled (or omsAgent with camelCase) addon_consts = self.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") # Check new API location container_insights_enabled = ( @@ -3009,7 +3007,6 @@ def get_enable_high_log_scale_mode(self) -> Union[bool, None]: # Validate that monitoring addon is enabled (either being enabled now or already enabled in cluster) addon_consts = self.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") # Check if monitoring is being enabled in the command enable_addons = self.raw_param.get("enable_addons") @@ -5538,8 +5535,11 @@ def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: ) elif self._should_create_dcra(): addon_consts = self.context.get_addon_consts() - monitoring_addon_key = _get_monitoring_addon_key_from_consts( - cluster.addon_profiles, addon_consts) if cluster.addon_profiles else addon_consts.get("CONST_MONITORING_ADDON_NAME") + monitoring_addon_key = ( + _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) + if cluster.addon_profiles + else addon_consts.get("CONST_MONITORING_ADDON_NAME") + ) self.context.external_functions.ensure_container_insights_for_monitoring( self.cmd, cluster.addon_profiles[monitoring_addon_key], @@ -5847,10 +5847,8 @@ def update_monitoring_profile_flow_logs(self, mc: ManagedCluster) -> ManagedClus """ self._ensure_mc(mc) - # Call the base class implementation if it exists (CLI >= 2.84.0 added this method to the base). - # This ensures any base-class monitoring handling (e.g., enable/disable) is applied first. - if hasattr(super(), 'update_monitoring_profile_flow_logs'): - mc = super().update_monitoring_profile_flow_logs(mc) + # Do not call super() — the base class HLSM validation does not handle + # preview-specific flags (--enable-azure-monitor-logs, --enable-addons monitoring). # Trigger validation for high log scale mode when container network logs are enabled. # This ensures proper error messages are raised before cluster update if the user @@ -8168,8 +8166,11 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: addon_consts = self.context.get_addon_consts() CONST_MONITORING_USING_AAD_MSI_AUTH = addon_consts.get("CONST_MONITORING_USING_AAD_MSI_AUTH") - monitoring_addon_key = _get_monitoring_addon_key_from_consts( - cluster.addon_profiles, addon_consts) if cluster.addon_profiles else addon_consts.get("CONST_MONITORING_ADDON_NAME") + monitoring_addon_key = ( + _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) + if cluster.addon_profiles + else addon_consts.get("CONST_MONITORING_ADDON_NAME") + ) if (cluster.addon_profiles and monitoring_addon_key in cluster.addon_profiles and From bb7ad7ef14a2d490434dc7ac2a3bfad24e0cd668 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 30 Mar 2026 16:09:02 +0100 Subject: [PATCH 26/32] Fix missing CONST_MONITORING_ADDON_NAME_CAMELCASE import --- src/aks-preview/azext_aks_preview/managed_cluster_decorator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 9bcd75cab7a..2f47ed6e160 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -54,6 +54,7 @@ CONST_ACNS_DATAPATH_ACCELERATION_MODE_NONE, CONST_TRANSIT_ENCRYPTION_TYPE_MTLS, CONST_ADVANCED_NETWORKPOLICIES_L7, + CONST_MONITORING_ADDON_NAME_CAMELCASE, ) from azext_aks_preview.azurecontainerstorage._consts import ( CONST_ACSTOR_EXT_INSTALLATION_NAME, From f81732b2b5c63cde296f581d05cb39122bc07724 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 31 Mar 2026 10:26:01 +0100 Subject: [PATCH 27/32] Fix tests + update history --- .../azext_aks_preview/tests/latest/test_aks_commands.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py index a03efba9c0d..38f2dcc7a06 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py @@ -19739,6 +19739,10 @@ def test_aks_update_standalone_enable_high_log_scale_mode( self.check("addonProfiles.omsagent.enabled", True), ]) + # Wait for any in-progress addon operations to complete before update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + # Update: enable high log scale mode standalone update_cmd = ( "aks update --resource-group={resource_group} --name={name} --yes " From 0c20eba52993ca7fc9e9618eacd7e5b381799039 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 1 Apr 2026 14:05:34 +0100 Subject: [PATCH 28/32] PR comments --- src/aks-preview/azext_aks_preview/custom.py | 2 +- .../managed_cluster_decorator.py | 156 ++++++++---------- .../latest/test_managed_cluster_decorator.py | 70 ++++---- 3 files changed, 114 insertions(+), 114 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 891525c61e5..92fe98eb058 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -553,7 +553,7 @@ def __init__(self, location, resource_id): ) resources = get_resources_client(cmd.cli_ctx, cluster_subscription) - for attempt in range(3): + for _ in range(3): try: if enable_syslog: resources.begin_create_or_update_by_id( diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 2f47ed6e160..a48c96d0234 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -180,7 +180,6 @@ def _get_monitoring_addon_key_from_consts(addon_profiles, addon_consts): addon_consts.get("CONST_MONITORING_ADDON_NAME"), ) - # pylint: disable=too-few-public-methods class AKSPreviewManagedClusterModels(AKSManagedClusterModels): """Store the models used in aks series of commands. @@ -4679,8 +4678,8 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: mc.addon_profiles[CONST_MONITORING_ADDON_NAME] = addon_profile # DCR and DCRA creation is deferred to postprocessing_after_mc_created - # (_postprocess_monitoring_enable) so that all flags are finalized and - # the cluster exists. Only MSI clusters need a DCR. + # so that all flags are finalized and the cluster exists. + # Only MSI clusters need a DCR. self.context.set_intermediate("monitoring_addon_enabled", True, overwrite_exists=True) def _setup_opentelemetry_metrics(self, mc: ManagedCluster) -> None: @@ -5318,7 +5317,46 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: # monitoring addon monitoring_addon_enabled = self.context.get_intermediate("monitoring_addon_enabled", default_value=False) if monitoring_addon_enabled: - self._postprocess_monitoring_enable(cluster) + enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() + if not enable_msi_auth_for_monitoring: + # add cluster spn/msi Monitoring Metrics Publisher role assignment to publish metrics to MDM + # mdm metrics is supported only in azure public cloud, so add the role assignment only in this cloud + cloud_name = self.cmd.cli_ctx.cloud.name + if cloud_name.lower() == "azurecloud": + cluster_resource_id = resource_id( + subscription=self.context.get_subscription_id(), + resource_group=self.context.get_resource_group_name(), + namespace="Microsoft.ContainerService", + type="managedClusters", + name=self.context.get_name(), + ) + self.context.external_functions.add_monitoring_role_assignment( + cluster, cluster_resource_id, self.cmd + ) + elif self._should_create_dcra(): + addon_consts = self.context.get_addon_consts() + monitoring_addon_key = ( + _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) + if cluster.addon_profiles + else addon_consts.get("CONST_MONITORING_ADDON_NAME") + ) + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + cluster.addon_profiles[monitoring_addon_key], + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=False, + aad_route=self.context.get_enable_msi_auth_for_monitoring(), + create_dcr=True, + create_dcra=True, + enable_syslog=self.context.get_enable_syslog(), + data_collection_settings=self.context.get_data_collection_settings(), + is_private_cluster=self.context.get_enable_private_cluster(), + ampls_resource_id=self.context.get_ampls_resource_id(), + enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), + ) # Handle monitoring addon postprocessing (disable case) - same logic as aks_disable_addons monitoring_addon_disable_postprocessing_required = self.context.get_intermediate( @@ -5326,7 +5364,36 @@ def postprocessing_after_mc_created(self, cluster: ManagedCluster) -> None: ) if monitoring_addon_disable_postprocessing_required: - self._postprocess_monitoring_disable() + addon_consts = self.context.get_addon_consts() + CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") + + # Get the current cluster state to check config before it was disabled + current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) + + if (current_cluster.addon_profiles and + CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles): + + addon_profile = current_cluster.addon_profiles[CONST_MONITORING_ADDON_NAME] + + try: + self.context.external_functions.ensure_container_insights_for_monitoring( + self.cmd, + addon_profile, + self.context.get_subscription_id(), + self.context.get_resource_group_name(), + self.context.get_name(), + self.context.get_location(), + remove_monitoring=True, + aad_route=True, + create_dcr=False, + create_dcra=True, + enable_syslog=False, + data_collection_settings=None, + ampls_resource_id=None, + enable_high_log_scale_mode=False + ) + except TypeError: + pass # ingress appgw addon ingress_appgw_addon_enabled = self.context.get_intermediate("ingress_appgw_addon_enabled", default_value=False) @@ -5506,91 +5573,14 @@ def _should_create_dcra(self) -> bool: ) def _is_cnl_or_hlsm_changing(self) -> bool: - """Return True if any CNL or High Log Scale Mode flag was provided.""" + """Return True if any CNL or High Log Scale Mode enable flag was provided.""" params = self.context.raw_param return ( params.get("enable_container_network_logs") is not None or params.get("enable_retina_flow_logs") is not None or - params.get("disable_container_network_logs") is not None or - params.get("disable_retina_flow_logs") is not None or params.get("enable_high_log_scale_mode") is not None ) - def _postprocess_monitoring_enable(self, cluster: ManagedCluster) -> None: - """Handle monitoring addon postprocessing for the enable case.""" - enable_msi_auth_for_monitoring = self.context.get_enable_msi_auth_for_monitoring() - if not enable_msi_auth_for_monitoring: - # add cluster spn/msi Monitoring Metrics Publisher role assignment to publish metrics to MDM - # mdm metrics is supported only in azure public cloud, so add the role assignment only in this cloud - cloud_name = self.cmd.cli_ctx.cloud.name - if cloud_name.lower() == "azurecloud": - cluster_resource_id = resource_id( - subscription=self.context.get_subscription_id(), - resource_group=self.context.get_resource_group_name(), - namespace="Microsoft.ContainerService", - type="managedClusters", - name=self.context.get_name(), - ) - self.context.external_functions.add_monitoring_role_assignment( - cluster, cluster_resource_id, self.cmd - ) - elif self._should_create_dcra(): - addon_consts = self.context.get_addon_consts() - monitoring_addon_key = ( - _get_monitoring_addon_key_from_consts(cluster.addon_profiles, addon_consts) - if cluster.addon_profiles - else addon_consts.get("CONST_MONITORING_ADDON_NAME") - ) - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - cluster.addon_profiles[monitoring_addon_key], - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=False, - aad_route=self.context.get_enable_msi_auth_for_monitoring(), - create_dcr=True, - create_dcra=True, - enable_syslog=self.context.get_enable_syslog(), - data_collection_settings=self.context.get_data_collection_settings(), - is_private_cluster=self.context.get_enable_private_cluster(), - ampls_resource_id=self.context.get_ampls_resource_id(), - enable_high_log_scale_mode=self.context.get_enable_high_log_scale_mode(), - ) - - def _postprocess_monitoring_disable(self) -> None: - """Handle monitoring addon postprocessing for the disable case.""" - addon_consts = self.context.get_addon_consts() - CONST_MONITORING_ADDON_NAME = addon_consts.get("CONST_MONITORING_ADDON_NAME") - - # Get the current cluster state to check config before it was disabled - current_cluster = self.client.get(self.context.get_resource_group_name(), self.context.get_name()) - - if (current_cluster.addon_profiles and - CONST_MONITORING_ADDON_NAME in current_cluster.addon_profiles): - - addon_profile = current_cluster.addon_profiles[CONST_MONITORING_ADDON_NAME] - - try: - self.context.external_functions.ensure_container_insights_for_monitoring( - self.cmd, - addon_profile, - self.context.get_subscription_id(), - self.context.get_resource_group_name(), - self.context.get_name(), - self.context.get_location(), - remove_monitoring=True, - aad_route=True, - create_dcr=False, - create_dcra=True, - enable_syslog=False, - data_collection_settings=None, - ampls_resource_id=None, - enable_high_log_scale_mode=False - ) - except TypeError: - pass class AKSPreviewManagedClusterUpdateDecorator(AKSManagedClusterUpdateDecorator): diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index bb30e26666a..e5dbaa0f30e 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -8431,10 +8431,10 @@ def test_postprocessing_create_dcr_true_when_enable_retina_flow_logs(self): _, kwargs = mock_ecifm.call_args self.assertTrue(kwargs["create_dcr"]) - def test_postprocessing_create_dcr_true_when_disable_container_network_logs(self): - """create_dcr=True when disable_container_network_logs is set. + def test_postprocessing_no_dcr_when_disable_container_network_logs(self): + """ensure_container_insights is NOT called when only disable_container_network_logs is set. - disable_container_network_logs is in cnl_or_hlsm_changing, so create_dcr must be True. + Disable flags should not trigger DCR/DCRA creation. """ dec = self._make_postprocessing_decorator({"disable_container_network_logs": True}) cluster = self._make_cluster_with_monitoring() @@ -8443,14 +8443,12 @@ def test_postprocessing_create_dcr_true_when_disable_container_network_logs(self external_functions, "ensure_container_insights_for_monitoring", return_value=None ) as mock_ecifm: dec.postprocessing_after_mc_created(cluster) - mock_ecifm.assert_called_once() - _, kwargs = mock_ecifm.call_args - self.assertTrue(kwargs["create_dcr"]) + mock_ecifm.assert_not_called() - def test_postprocessing_create_dcr_true_when_disable_retina_flow_logs(self): - """create_dcr=True when the deprecated disable_retina_flow_logs flag is set. + def test_postprocessing_no_dcr_when_disable_retina_flow_logs(self): + """ensure_container_insights is NOT called when only disable_retina_flow_logs is set. - disable_retina_flow_logs is in cnl_or_hlsm_changing, so create_dcr must be True. + Disable flags should not trigger DCR/DCRA creation. """ dec = self._make_postprocessing_decorator({"disable_retina_flow_logs": True}) cluster = self._make_cluster_with_monitoring() @@ -8459,9 +8457,7 @@ def test_postprocessing_create_dcr_true_when_disable_retina_flow_logs(self): external_functions, "ensure_container_insights_for_monitoring", return_value=None ) as mock_ecifm: dec.postprocessing_after_mc_created(cluster) - mock_ecifm.assert_called_once() - _, kwargs = mock_ecifm.call_args - self.assertTrue(kwargs["create_dcr"]) + mock_ecifm.assert_not_called() def test_postprocessing_create_dcr_true_when_enable_high_log_scale_mode_true(self): """create_dcr=True when enable_high_log_scale_mode=True is set. @@ -8565,15 +8561,22 @@ def test_postprocessing_create_dcr_true_when_enable_azure_monitor_logs(self): # Tests for _should_create_dcra and _is_cnl_or_hlsm_changing helpers # ------------------------------------------------------------------ def test_is_cnl_or_hlsm_changing_true_for_cnl_flags(self): - """_is_cnl_or_hlsm_changing returns True when any CNL/HLSM flag is set.""" + """_is_cnl_or_hlsm_changing returns True when any CNL/HLSM enable flag is set.""" for param_name in [ "enable_container_network_logs", "enable_retina_flow_logs", + ]: + dec = self._make_postprocessing_decorator({param_name: True}) + self.assertTrue(dec._is_cnl_or_hlsm_changing(), f"Expected True for {param_name}") + + def test_is_cnl_or_hlsm_changing_false_for_disable_flags(self): + """_is_cnl_or_hlsm_changing returns False when only disable flags are set.""" + for param_name in [ "disable_container_network_logs", "disable_retina_flow_logs", ]: dec = self._make_postprocessing_decorator({param_name: True}) - self.assertTrue(dec._is_cnl_or_hlsm_changing(), f"Expected True for {param_name}") + self.assertFalse(dec._is_cnl_or_hlsm_changing(), f"Expected False for {param_name}") def test_is_cnl_or_hlsm_changing_true_for_hlsm_flag(self): dec = self._make_postprocessing_decorator({"enable_high_log_scale_mode": True}) @@ -8600,11 +8603,14 @@ def test_should_create_dcra_false_when_no_relevant_flags(self): self.assertFalse(dec._should_create_dcra()) # ------------------------------------------------------------------ - # Tests for _postprocess_monitoring_disable + # Tests for monitoring disable postprocessing (inlined in postprocessing_after_mc_created) # ------------------------------------------------------------------ - def test_postprocess_monitoring_disable_calls_ensure_container_insights(self): - """_postprocess_monitoring_disable calls ensure_container_insights with remove_monitoring=True.""" + def test_postprocessing_monitoring_disable_calls_ensure_container_insights(self): + """postprocessing_after_mc_created calls ensure_container_insights with remove_monitoring=True for disable.""" dec = self._make_postprocessing_decorator({}) + # Disable the enable path so only the disable path runs + dec.context.set_intermediate("monitoring_addon_enabled", False, overwrite_exists=True) + dec.context.set_intermediate("monitoring_addon_disable_postprocessing_required", True, overwrite_exists=True) # Mock client.get to return cluster with monitoring addon cluster_with_monitoring = self.models.ManagedCluster( location="test_location", @@ -8618,14 +8624,16 @@ def test_postprocess_monitoring_disable_calls_ensure_container_insights(self): with patch.object( external_functions, "ensure_container_insights_for_monitoring", return_value=None ) as mock_ecifm: - dec._postprocess_monitoring_disable() + dec.postprocessing_after_mc_created(cluster_with_monitoring) mock_ecifm.assert_called_once() _, kwargs = mock_ecifm.call_args self.assertTrue(kwargs["remove_monitoring"]) - def test_postprocess_monitoring_disable_no_addon_is_noop(self): - """_postprocess_monitoring_disable is a no-op when no monitoring addon on current cluster.""" + def test_postprocessing_monitoring_disable_no_addon_is_noop(self): + """postprocessing_after_mc_created disable path is a no-op when no monitoring addon on current cluster.""" dec = self._make_postprocessing_decorator({}) + dec.context.set_intermediate("monitoring_addon_enabled", False, overwrite_exists=True) + dec.context.set_intermediate("monitoring_addon_disable_postprocessing_required", True, overwrite_exists=True) cluster_without_monitoring = self.models.ManagedCluster(location="test_location") dec.client = Mock() dec.client.get = Mock(return_value=cluster_without_monitoring) @@ -8633,12 +8641,14 @@ def test_postprocess_monitoring_disable_no_addon_is_noop(self): with patch.object( external_functions, "ensure_container_insights_for_monitoring", return_value=None ) as mock_ecifm: - dec._postprocess_monitoring_disable() + dec.postprocessing_after_mc_created(cluster_without_monitoring) mock_ecifm.assert_not_called() - def test_postprocess_monitoring_disable_swallows_type_error(self): - """_postprocess_monitoring_disable should not raise on TypeError from ensure_container_insights.""" + def test_postprocessing_monitoring_disable_swallows_type_error(self): + """postprocessing_after_mc_created disable path should not raise on TypeError.""" dec = self._make_postprocessing_decorator({}) + dec.context.set_intermediate("monitoring_addon_enabled", False, overwrite_exists=True) + dec.context.set_intermediate("monitoring_addon_disable_postprocessing_required", True, overwrite_exists=True) cluster_with_monitoring = self.models.ManagedCluster( location="test_location", addon_profiles={ @@ -8652,7 +8662,7 @@ def test_postprocess_monitoring_disable_swallows_type_error(self): external_functions, "ensure_container_insights_for_monitoring", side_effect=TypeError("test") ): # Should not raise - dec._postprocess_monitoring_disable() + dec.postprocessing_after_mc_created(cluster_with_monitoring) # ------------------------------------------------------------------ # Tests for put_mc conditional postprocessing @@ -11741,14 +11751,14 @@ def test_setup_azure_monitor_logs_with_omsagent_camelcase(self): # Call _setup_azure_monitor_logs dec_1._setup_azure_monitor_logs(mc_1) - # Verify: The parent class normalizes addon keys to lowercase in-place, - # so "omsAgent" becomes "omsagent". The key point is no duplicate is created. - self.assertIn("omsagent", mc_1.addon_profiles) - self.assertNotIn("omsAgent", mc_1.addon_profiles) + # Verify: The existing key is preserved (no duplicate created). + # The implementation keeps the original casing ("omsAgent") found in addon_profiles. self.assertEqual(len([k for k in mc_1.addon_profiles if k.lower() == "omsagent"]), 1) # No duplicate - self.assertTrue(mc_1.addon_profiles["omsagent"].enabled) + # Find the actual key used (could be normalized or preserved depending on parent behavior) + actual_key = next(k for k in mc_1.addon_profiles if k.lower() == "omsagent") + self.assertTrue(mc_1.addon_profiles[actual_key].enabled) self.assertEqual( - mc_1.addon_profiles["omsagent"].config["logAnalyticsWorkspaceResourceID"], + mc_1.addon_profiles[actual_key].config["logAnalyticsWorkspaceResourceID"], "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/test-workspace" ) From 6dc32c476cf792e96feb9ac44c3ef5ce10fb48a1 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Wed, 1 Apr 2026 14:12:25 +0100 Subject: [PATCH 29/32] Fix lint --- src/aks-preview/azext_aks_preview/custom.py | 2 +- src/aks-preview/azext_aks_preview/managed_cluster_decorator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 92fe98eb058..891525c61e5 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -553,7 +553,7 @@ def __init__(self, location, resource_id): ) resources = get_resources_client(cmd.cli_ctx, cluster_subscription) - for _ in range(3): + for attempt in range(3): try: if enable_syslog: resources.begin_create_or_update_by_id( diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index a48c96d0234..0da430be278 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -180,6 +180,7 @@ def _get_monitoring_addon_key_from_consts(addon_profiles, addon_consts): addon_consts.get("CONST_MONITORING_ADDON_NAME"), ) + # pylint: disable=too-few-public-methods class AKSPreviewManagedClusterModels(AKSManagedClusterModels): """Store the models used in aks series of commands. @@ -5582,7 +5583,6 @@ def _is_cnl_or_hlsm_changing(self) -> bool: ) - class AKSPreviewManagedClusterUpdateDecorator(AKSManagedClusterUpdateDecorator): def __init__( self, cmd: AzCliCommand, client: ContainerServiceClient, raw_parameters: Dict, resource_type: ResourceType From 7d9c8f804e44a66714c348771d9be4ed3ff9b245 Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Mon, 13 Apr 2026 11:53:25 +0100 Subject: [PATCH 30/32] Fix pending history --- src/aks-preview/HISTORY.rst | 6 ++++++ src/aks-preview/setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/aks-preview/HISTORY.rst b/src/aks-preview/HISTORY.rst index 90a7e131af8..a7f76cc54df 100644 --- a/src/aks-preview/HISTORY.rst +++ b/src/aks-preview/HISTORY.rst @@ -12,6 +12,11 @@ To release a new version, please select a new version number (usually plus 1 to Pending +++++++ +20.0.0b3 ++++++++ +* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. +* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled + 20.0.0b2 +++++++ * `az aks nodepool update`: clean up some useless code in the update managed gpu function. @@ -20,6 +25,7 @@ Pending * `az aks machine add`: Add `--enable-ultra-ssd` flag support to enable ultra ssd on a machine. * `az aks update`: Fix V2-only NAT gateway params (e.g. `--nat-gateway-managed-outbound-ipv6-count`) being rejected on update when `--outbound-type` is not re-specified for an already-V2 cluster. + 20.0.0b2 +++++++ * `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. diff --git a/src/aks-preview/setup.py b/src/aks-preview/setup.py index cbf33355898..d28112e3442 100644 --- a/src/aks-preview/setup.py +++ b/src/aks-preview/setup.py @@ -9,7 +9,7 @@ from setuptools import find_packages, setup -VERSION = "20.0.0b2" +VERSION = "20.0.0b3" CLASSIFIERS = [ "Development Status :: 4 - Beta", From dfc7b3f811af97eb5b66094a90cc564150111e2f Mon Sep 17 00:00:00 2001 From: carlotaarvela Date: Tue, 14 Apr 2026 16:24:59 +0100 Subject: [PATCH 31/32] PR comments --- src/aks-preview/azext_aks_preview/custom.py | 5 +- .../managed_cluster_decorator.py | 7 + .../latest/test_managed_cluster_decorator.py | 154 ++++++++++++++++++ 3 files changed, 162 insertions(+), 4 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 891525c61e5..de9ae1a3ab8 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -553,7 +553,7 @@ def __init__(self, location, resource_id): ) resources = get_resources_client(cmd.cli_ctx, cluster_subscription) - for attempt in range(3): + for _ in range(3): try: if enable_syslog: resources.begin_create_or_update_by_id( @@ -571,9 +571,6 @@ def __init__(self, location, resource_id): break except (CLIError, HttpResponseError) as e: error = e - # Wait before retry to allow workspace tables to become available - if attempt < 2: - time.sleep(30) else: raise error diff --git a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py index 0da430be278..9a140ecb664 100644 --- a/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/managed_cluster_decorator.py @@ -7836,6 +7836,13 @@ def _setup_azure_monitor_logs(self, mc: ManagedCluster) -> None: if existing_key: addon_profile = mc.addon_profiles[existing_key] + # Detect workspace change: if the workspace is different from the existing one, + # trigger DCR postprocessing so the DCR destination gets updated. + old_config = addon_profile.config or {} + old_workspace = old_config.get(CONST_MONITORING_LOG_ANALYTICS_WORKSPACE_RESOURCE_ID, "") + if old_workspace and old_workspace.lower() != workspace_resource_id.lower(): + self.context.set_intermediate( + "monitoring_addon_postprocessing_required", True, overwrite_exists=True) else: addon_profile = self.models.ManagedClusterAddonProfile(enabled=False) existing_key = CONST_MONITORING_ADDON_NAME diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py index e5dbaa0f30e..3f8cd77ae37 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_managed_cluster_decorator.py @@ -16340,6 +16340,160 @@ def test_setup_azure_monitor_logs_no_retina_flags_without_cnl(self): self.assertIsNotNone(addon_profile) self.assertNotIn("enableRetinaNetworkFlags", addon_profile.config) + # ------------------------------------------------------------------ + # Tests for _setup_azure_monitor_logs workspace change detection + # ------------------------------------------------------------------ + def test_setup_azure_monitor_logs_workspace_change_triggers_postprocessing(self): + """_setup_azure_monitor_logs sets monitoring_addon_postprocessing_required when workspace changes.""" + old_ws = "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/old-ws" + new_ws = "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/new-ws" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "workspace_resource_id": new_ws, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": old_ws, + "useAADAuth": "true", + }, + ) + }, + ) + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test-subscription-id") + + with patch.object( + dec.context.external_functions, + "sanitize_loganalytics_ws_resource_id", + side_effect=lambda x: x, + ): + dec._setup_azure_monitor_logs(mc) + + self.assertTrue( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + # Verify workspace was updated + actual_key = next(k for k in mc.addon_profiles if k.lower() == "omsagent") + self.assertEqual(mc.addon_profiles[actual_key].config["logAnalyticsWorkspaceResourceID"], new_ws) + + def test_setup_azure_monitor_logs_same_workspace_no_postprocessing(self): + """_setup_azure_monitor_logs does NOT set monitoring_addon_postprocessing_required when workspace is unchanged.""" + ws = "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/same-ws" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "workspace_resource_id": ws, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": ws, + "useAADAuth": "true", + }, + ) + }, + ) + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test-subscription-id") + + with patch.object( + dec.context.external_functions, + "sanitize_loganalytics_ws_resource_id", + side_effect=lambda x: x, + ): + dec._setup_azure_monitor_logs(mc) + + self.assertFalse( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + def test_setup_azure_monitor_logs_workspace_change_case_insensitive(self): + """_setup_azure_monitor_logs compares workspaces case-insensitively (no false positives on casing).""" + ws_lower = "/subscriptions/test/resourcegroups/test/providers/microsoft.operationalinsights/workspaces/my-ws" + ws_mixed = "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/my-ws" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "workspace_resource_id": ws_mixed, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={ + CONST_MONITORING_ADDON_NAME: self.models.ManagedClusterAddonProfile( + enabled=True, + config={ + "logAnalyticsWorkspaceResourceID": ws_lower, + "useAADAuth": "true", + }, + ) + }, + ) + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test-subscription-id") + + with patch.object( + dec.context.external_functions, + "sanitize_loganalytics_ws_resource_id", + side_effect=lambda x: x, + ): + dec._setup_azure_monitor_logs(mc) + + # Same workspace (different casing) should NOT trigger postprocessing + self.assertFalse( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + + def test_setup_azure_monitor_logs_new_addon_no_postprocessing(self): + """_setup_azure_monitor_logs does NOT trigger postprocessing when there is no existing addon (fresh enable).""" + new_ws = "/subscriptions/test/resourceGroups/test/providers/Microsoft.OperationalInsights/workspaces/new-ws" + dec = AKSPreviewManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_logs": True, + "workspace_resource_id": new_ws, + }, + CUSTOM_MGMT_AKS_PREVIEW, + ) + mc = self.models.ManagedCluster( + location="test_location", + addon_profiles={}, + ) + dec.context.attach_mc(mc) + dec.context.set_intermediate("subscription_id", "test-subscription-id") + + with patch.object( + dec.context.external_functions, + "sanitize_loganalytics_ws_resource_id", + side_effect=lambda x: x, + ): + dec._setup_azure_monitor_logs(mc) + + # Fresh enable — no old workspace to compare, should NOT trigger postprocessing + self.assertFalse( + dec.context.get_intermediate("monitoring_addon_postprocessing_required", default_value=False) + ) + # ------------------------------------------------------------------ # Tests for _disable_azure_monitor_logs disabling containerInsights # ------------------------------------------------------------------ From 8b869cca61fbed2368605a26d62fecfc83693abe Mon Sep 17 00:00:00 2001 From: FumingZhang <81607949+FumingZhang@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:41:32 +1000 Subject: [PATCH 32/32] Apply suggestions from code review Co-authored-by: FumingZhang <81607949+FumingZhang@users.noreply.github.com> --- src/aks-preview/HISTORY.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/aks-preview/HISTORY.rst b/src/aks-preview/HISTORY.rst index a7f76cc54df..01e34dfc072 100644 --- a/src/aks-preview/HISTORY.rst +++ b/src/aks-preview/HISTORY.rst @@ -25,12 +25,6 @@ Pending * `az aks machine add`: Add `--enable-ultra-ssd` flag support to enable ultra ssd on a machine. * `az aks update`: Fix V2-only NAT gateway params (e.g. `--nat-gateway-managed-outbound-ipv6-count`) being rejected on update when `--outbound-type` is not re-specified for an already-V2 cluster. - -20.0.0b2 -+++++++ -* `az aks create/update`: Fix DCR not being created or updated when `--enable-container-network-logs`, `--enable-retina-flow-logs`, or `--enable-high-log-scale-mode` flags are used, ensuring the Data Collection Rule streams (e.g. `Microsoft-ContainerLogV2-HighScale`) are kept in sync. -* `az aks update`: Add validation for `--enable-high-log-scale-mode` on the update path requiring the monitoring addon with MSI authentication to be enabled. - 20.0.0b1 +++++++ * [Breaking Change] `az aks create/update`: Change `--nat-gateway-outbound-ips` and `--nat-gateway-outbound-ip-prefixes` to use comma-separated values, consistent with load balancer outbound IP parameters.