From a6c3d47f98ec1eeee02982e17249deece03a34cc Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 03:12:35 +0000 Subject: [PATCH 01/11] add high log scale support for ARC Cli --- src/k8s-extension/HISTORY.rst | 4 ++ .../partner_extensions/ContainerInsights.py | 59 ++++++++++++++++++- src/k8s-extension/setup.py | 2 +- 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index 09a13348d04..88258060a02 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -3,6 +3,10 @@ Release History =============== +1.6.6 +++++++++ +* microsoft.azuremonitor.containers: Extend ContainerInsights Extension for high log scale mode and amplify support. + 1.6.5 ++++++++++++++++++ * microsoft.dataprotection.kubernetes: Add support for 'DisableInformerCache' configuration. diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index b059850d7b4..866fcf3929d 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -520,6 +520,14 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r raise InvalidArgumentValueError('streams must be an array type') extensionSettings["dataCollectionSettings"] = dataCollectionSettings + if useAADAuth and 'amalogs.enableHighLogScaleMode' in configuration_settings: + enableHighLogScaleMode = configuration_settings['amalogs.enableHighLogScaleMode'] + if isinstance(enableHighLogScaleMode, str): + enableHighLogScaleMode = enableHighLogScaleMode.lower() + if enableHighLogScaleMode not in ["true", "false"]: + raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false') + extensionSettings["enableHighLogScaleMode"] = enableHighLogScaleMode + workspace_resource_id = workspace_resource_id.strip() if configuration_protected_settings is not None: @@ -548,7 +556,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r if is_ci_extension_type: if useAADAuth: logger.info("creating data collection rule and association") - _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings) + _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enableHighLogScaleMode) elif not _is_container_insights_solution_exists(cmd, workspace_resource_id): logger.info("Creating ContainerInsights solution resource, since it doesn't exist and it is using legacy authentication") _ensure_container_insights_for_monitoring(cmd, workspace_resource_id).result() @@ -617,7 +625,7 @@ def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url): return tags -def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings): +def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enable_high_log_scale_mode): from azure.core.exceptions import HttpResponseError cluster_region = '' @@ -652,6 +660,18 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ dataCollectionRuleName = dataCollectionRuleName[0:64] dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}" + # ingestion DCE MUST be in workspace region + ingestionDataCollectionEndpointName = f"MSCI-ingest-{workspace_region}-{cluster_name}" + # Max length of the DCE name is 44 chars + ingestionDataCollectionEndpointName = _trim_suffix_if_needed(ingestionDataCollectionEndpointName[0:43]) + ingestion_dce_resource_id = None + + # create ingestion DCE if high log scale mode enabled + if enable_high_log_scale_mode: + ingestion_dce_resource_id = create_data_collection_endpoint( + cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName + ) + # first get the association between region display names and region IDs (because for some reason # the "which RPs are available in which regions" check returns region display names) region_names_to_id = {} @@ -691,6 +711,11 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ } extensionSettings["dataCollectionSettings"] = dataCollectionSettings + if enable_high_log_scale_mode: + for i, v in enumerate(streams): + if v == "Microsoft-ContainerLogV2": + streams[i] = "Microsoft-ContainerLogV2-HighScale" + # create the DCR dcr_creation_body = json.dumps( { @@ -722,6 +747,7 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ } ] }, + "dataCollectionEndpointId": ingestion_dce_resource_id }, } ) @@ -755,3 +781,32 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ error = e else: raise error + +def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName): + # create the ingestion DCE + ingestion_dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{ingestionDataCollectionEndpointName}" + ingestion_dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{ingestion_dce_resource_id}?api-version=2022-06-01" + ingestion_dce_creation_body = json.dumps({ + "location": workspace_region, + "kind": "Linux", + "properties": { + "networkAcls": { + "publicNetworkAccess": "Enabled" + } + } + }) + for _ in range(3): + try: + send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body) + error = None + break + except AzCLIError as e: + error = e + else: + raise error + return ingestion_dce_resource_id + +def _trim_suffix_if_needed(s, suffix="-"): + if s.endswith(suffix): + s = s[:-len(suffix)] + return s \ No newline at end of file diff --git a/src/k8s-extension/setup.py b/src/k8s-extension/setup.py index c46e1aca890..bdee2be8241 100644 --- a/src/k8s-extension/setup.py +++ b/src/k8s-extension/setup.py @@ -33,7 +33,7 @@ # TODO: Add any additional SDK dependencies here DEPENDENCIES = [] -VERSION = "1.6.5" +VERSION = "1.6.6" with open("README.rst", "r", encoding="utf-8") as f: README = f.read() From 0e31878754847ef5f20890b906ca0f430950a798 Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 03:14:15 +0000 Subject: [PATCH 02/11] add high log scale support for ARC Cli --- src/k8s-extension/HISTORY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index 88258060a02..94df11be9ce 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -5,7 +5,7 @@ Release History 1.6.6 ++++++++ -* microsoft.azuremonitor.containers: Extend ContainerInsights Extension for high log scale mode and amplify support. +* microsoft.azuremonitor.containers: Extend ContainerInsights Extension for high log scale mode support. 1.6.5 ++++++++++++++++++ From 5a24c8cb8d7a4762e7a4b084bdd98805f0b535b3 Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 05:23:56 +0000 Subject: [PATCH 03/11] update add stream --- .../partner_extensions/ContainerInsights.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index 866fcf3929d..cbf00705fa9 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -33,6 +33,20 @@ logger = get_logger(__name__) DCR_API_VERSION = "2022-06-01" +ContainerInsightsStreams = [ + "Microsoft-ContainerLog", + "Microsoft-ContainerLogV2-HighScale", + "Microsoft-KubeEvents", + "Microsoft-KubePodInventory", + "Microsoft-KubeNodeInventory", + "Microsoft-KubePVInventory", + "Microsoft-KubeServices", + "Microsoft-KubeMonAgentEvents", + "Microsoft-InsightsMetrics", + "Microsoft-ContainerInventory", + "Microsoft-ContainerNodeInventory", + "Microsoft-Perf", +] class ContainerInsights(DefaultExtension): def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_type, cluster_rp, @@ -523,10 +537,12 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r if useAADAuth and 'amalogs.enableHighLogScaleMode' in configuration_settings: enableHighLogScaleMode = configuration_settings['amalogs.enableHighLogScaleMode'] if isinstance(enableHighLogScaleMode, str): - enableHighLogScaleMode = enableHighLogScaleMode.lower() - if enableHighLogScaleMode not in ["true", "false"]: + enableHighLogScaleMode_str = enableHighLogScaleMode.lower() + if enableHighLogScaleMode_str not in ["true", "false"]: + raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false') + enableHighLogScaleMode = (enableHighLogScaleMode_str == "true") + elif not isinstance(enableHighLogScaleMode, bool): raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false') - extensionSettings["enableHighLogScaleMode"] = enableHighLogScaleMode workspace_resource_id = workspace_resource_id.strip() @@ -697,6 +713,8 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ # get existing tags on the container insights extension DCR if the customer added any existing_tags = get_existing_container_insights_extension_dcr_tags(cmd, dcr_url) streams = ["Microsoft-ContainerInsights-Group-Default"] + if enable_high_log_scale_mode: + streams = ContainerInsightsStreams if extensionSettings is None: extensionSettings = {} if 'dataCollectionSettings' in extensionSettings.keys(): From 8f1c690981b3e6cefb0b25f4ea3e8d0943905860 Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 06:39:06 +0000 Subject: [PATCH 04/11] update --- .../partner_extensions/ContainerInsights.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index cbf00705fa9..269c65ec948 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -48,6 +48,7 @@ "Microsoft-Perf", ] + class ContainerInsights(DefaultExtension): def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_type, cluster_rp, extension_type, scope, auto_upgrade_minor_version, release_train, version, target_namespace, @@ -800,6 +801,7 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ else: raise error + def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName): # create the ingestion DCE ingestion_dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{ingestionDataCollectionEndpointName}" @@ -824,7 +826,8 @@ def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group raise error return ingestion_dce_resource_id + def _trim_suffix_if_needed(s, suffix="-"): if s.endswith(suffix): s = s[:-len(suffix)] - return s \ No newline at end of file + return s From 5f9ead18c23159ec7a4665b29d50d82d219610d3 Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 18:00:54 +0000 Subject: [PATCH 05/11] add test --- .../latest/test_k8s_extension_scenario.py | 94 ++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py b/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py index c0436d709fb..fb98dc2312b 100644 --- a/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py +++ b/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py @@ -6,12 +6,11 @@ # pylint: disable=line-too-long import os +import json from azure.cli.testsdk import (ScenarioTest, record_only) - TEST_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), '..')) - class K8sExtensionScenarioTest(ScenarioTest): @record_only() def test_k8s_extension(self): @@ -60,3 +59,94 @@ def test_k8s_extension(self): found_extension = True break self.assertFalse(found_extension) + + +class ContainerInsightsExtensionTest(ScenarioTest): + @record_only() + def test_container_insights_high_log_scale(self): + self.kwargs.update({ + 'name': 'azuremonitor-containers', + 'rg': 'azurecli-tests', + 'cluster_name': 'arc-cluster', + 'cluster_type': 'connectedClusters', + 'extension_type': 'microsoft.azuremonitor.containers', + 'config_settings': json.dumps({ + 'amalogs.useAADAuth': 'true', + 'amalogs.enableHighLogScaleMode': 'true', + 'dataCollectionSettings': json.dumps({ + 'interval': '1m', + 'enableContainerLogV2': True, + 'streams': ['Microsoft-ContainerLogV2'] + }) + }) + }) + + # Test creating extension with high log scale enabled + result = self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' + '--extension-type {extension_type} --configuration-settings {config_settings}').get_output_in_json() + + # Verify the extension was created successfully + self.assertEqual(result['name'], self.kwargs['name']) + self.assertEqual(result['extensionType'], self.kwargs['extension_type']) + + # Verify high log scale mode settings were applied + config_settings = result.get('configurationSettings', {}) + self.assertEqual(config_settings.get('amalogs.enableHighLogScaleMode'), 'true') + self.assertEqual(config_settings.get('amalogs.useAADAuth'), 'true') + + # Cleanup + self.cmd('k8s-extension delete -g {rg} -c {cluster_name} -n {name} --cluster-type {cluster_type} --force -y') + + @record_only() + def test_container_insights_invalid_high_log_scale(self): + self.kwargs.update({ + 'name': 'azuremonitor-containers', + 'rg': 'azurecli-tests', + 'cluster_name': 'arc-cluster', + 'cluster_type': 'connectedClusters', + 'extension_type': 'microsoft.azuremonitor.containers', + 'config_settings': json.dumps({ + 'amalogs.useAADAuth': 'true', + 'amalogs.enableHighLogScaleMode': 'invalid' # Invalid value + }) + }) + + # Test that invalid high log scale mode value is rejected + with self.assertRaisesRegexp(Exception, 'amalogs.enableHighLogScaleMode value MUST be either true or false'): + self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' + '--extension-type {extension_type} --configuration-settings {config_settings}') + + @record_only() + def test_container_insights_high_log_scale_streams(self): + self.kwargs.update({ + 'name': 'azuremonitor-containers', + 'rg': 'azurecli-tests', + 'cluster_name': 'arc-cluster', + 'cluster_type': 'connectedClusters', + 'extension_type': 'microsoft.azuremonitor.containers', + 'config_settings': json.dumps({ + 'amalogs.useAADAuth': 'true', + 'amalogs.enableHighLogScaleMode': 'true', + 'dataCollectionSettings': json.dumps({ + 'interval': '1m', + 'enableContainerLogV2': True, + 'streams': ['Microsoft-ContainerLogV2', 'Microsoft-ContainerLog'] + }) + }) + }) + + # Test creating extension with high log scale enabled and multiple streams + result = self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' + '--extension-type {extension_type} --configuration-settings {config_settings}').get_output_in_json() + + # Verify the extension was created successfully + self.assertEqual(result['name'], self.kwargs['name']) + + # Verify stream configuration was modified correctly (ContainerLogV2 should become ContainerLogV2-HighScale) + data_settings = json.loads(json.loads(result['configurationSettings']['dataCollectionSettings'])) + streams = data_settings.get('streams', []) + self.assertIn('Microsoft-ContainerLogV2-HighScale', streams) + self.assertNotIn('Microsoft-ContainerLogV2', streams) + + # Cleanup + self.cmd('k8s-extension delete -g {rg} -c {cluster_name} -n {name} --cluster-type {cluster_type} --force -y') From 4b91ad5864bd96cbb93701155d7a6afab3628b1c Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 18:44:12 +0000 Subject: [PATCH 06/11] update test --- ...est_container_insights_high_log_scale.yaml | 49 ++++++++++++ ...ainer_insights_high_log_scale_streams.yaml | 49 ++++++++++++ .../latest/test_k8s_extension_scenario.py | 80 +++++++------------ 3 files changed, 125 insertions(+), 53 deletions(-) create mode 100644 src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale.yaml create mode 100644 src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale_streams.yaml diff --git a/src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale.yaml b/src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale.yaml new file mode 100644 index 00000000000..f19e4cd2a41 --- /dev/null +++ b/src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale.yaml @@ -0,0 +1,49 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + CommandName: + - k8s-extension create + Connection: + - keep-alive + ParameterSetName: + - -g -n -c --cluster-type --extension-type --configuration-settings + User-Agent: + - AZURECLI/2.75.0 azsdk-python-core/1.31.0 Python/3.10.12 (Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.35) + method: GET + uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/azurecli-tests/providers/Microsoft.Kubernetes/connectedClusters/arc-cluster?api-version=2020-01-01-preview + response: + body: + string: '{"error":{"code":"ResourceGroupNotFound","message":"Resource group + ''azurecli-tests'' could not be found."}}' + headers: + cache-control: + - no-cache + content-length: + - '106' + content-type: + - application/json; charset=utf-8 + date: + - Tue, 01 Jul 2025 18:42:31 GMT + expires: + - '-1' + pragma: + - no-cache + strict-transport-security: + - max-age=31536000; includeSubDomains + x-cache: + - CONFIG_NOCACHE + x-content-type-options: + - nosniff + x-ms-failure-cause: + - gateway + x-msedge-ref: + - 'Ref A: 43ED95238AFF4CEA964D74A2529E4C51 Ref B: BL2AA2011005060 Ref C: 2025-07-01T18:42:32Z' + status: + code: 404 + message: Not Found +version: 1 diff --git a/src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale_streams.yaml b/src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale_streams.yaml new file mode 100644 index 00000000000..4f45cd12fd2 --- /dev/null +++ b/src/k8s-extension/azext_k8s_extension/tests/latest/recordings/test_container_insights_high_log_scale_streams.yaml @@ -0,0 +1,49 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + CommandName: + - k8s-extension create + Connection: + - keep-alive + ParameterSetName: + - -g -n -c --cluster-type --extension-type --configuration-settings + User-Agent: + - AZURECLI/2.75.0 azsdk-python-core/1.31.0 Python/3.10.12 (Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.35) + method: GET + uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/azurecli-tests/providers/Microsoft.Kubernetes/connectedClusters/arc-cluster?api-version=2020-01-01-preview + response: + body: + string: '{"error":{"code":"ResourceGroupNotFound","message":"Resource group + ''azurecli-tests'' could not be found."}}' + headers: + cache-control: + - no-cache + content-length: + - '106' + content-type: + - application/json; charset=utf-8 + date: + - Tue, 01 Jul 2025 18:42:31 GMT + expires: + - '-1' + pragma: + - no-cache + strict-transport-security: + - max-age=31536000; includeSubDomains + x-cache: + - CONFIG_NOCACHE + x-content-type-options: + - nosniff + x-ms-failure-cause: + - gateway + x-msedge-ref: + - 'Ref A: BEEC6FA3AAA44D3B923946D0F5FF610C Ref B: BL2AA2011006062 Ref C: 2025-07-01T18:42:32Z' + status: + code: 404 + message: Not Found +version: 1 diff --git a/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py b/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py index fb98dc2312b..53e2482abbc 100644 --- a/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py +++ b/src/k8s-extension/azext_k8s_extension/tests/latest/test_k8s_extension_scenario.py @@ -64,38 +64,25 @@ def test_k8s_extension(self): class ContainerInsightsExtensionTest(ScenarioTest): @record_only() def test_container_insights_high_log_scale(self): + from azure.core.exceptions import ResourceNotFoundError + self.kwargs.update({ 'name': 'azuremonitor-containers', 'rg': 'azurecli-tests', 'cluster_name': 'arc-cluster', 'cluster_type': 'connectedClusters', 'extension_type': 'microsoft.azuremonitor.containers', - 'config_settings': json.dumps({ - 'amalogs.useAADAuth': 'true', - 'amalogs.enableHighLogScaleMode': 'true', - 'dataCollectionSettings': json.dumps({ - 'interval': '1m', - 'enableContainerLogV2': True, - 'streams': ['Microsoft-ContainerLogV2'] - }) - }) + 'config_settings': '"amalogs.useAADAuth=true amalogs.enableHighLogScaleMode=true ' + + 'dataCollectionSettings=' + json.dumps({ + 'interval': '1m', + 'enableContainerLogV2': True, + 'streams': ['Microsoft-ContainerLogV2'] + }) + '"' }) - # Test creating extension with high log scale enabled - result = self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' - '--extension-type {extension_type} --configuration-settings {config_settings}').get_output_in_json() - - # Verify the extension was created successfully - self.assertEqual(result['name'], self.kwargs['name']) - self.assertEqual(result['extensionType'], self.kwargs['extension_type']) - - # Verify high log scale mode settings were applied - config_settings = result.get('configurationSettings', {}) - self.assertEqual(config_settings.get('amalogs.enableHighLogScaleMode'), 'true') - self.assertEqual(config_settings.get('amalogs.useAADAuth'), 'true') - - # Cleanup - self.cmd('k8s-extension delete -g {rg} -c {cluster_name} -n {name} --cluster-type {cluster_type} --force -y') + with self.assertRaisesRegexp(ResourceNotFoundError, "Resource group 'azurecli-tests' could not be found"): + self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' + '--extension-type {extension_type} --configuration-settings {config_settings}') @record_only() def test_container_insights_invalid_high_log_scale(self): @@ -104,49 +91,36 @@ def test_container_insights_invalid_high_log_scale(self): 'rg': 'azurecli-tests', 'cluster_name': 'arc-cluster', 'cluster_type': 'connectedClusters', - 'extension_type': 'microsoft.azuremonitor.containers', - 'config_settings': json.dumps({ - 'amalogs.useAADAuth': 'true', - 'amalogs.enableHighLogScaleMode': 'invalid' # Invalid value - }) + 'extension_type': 'microsoft.azuremonitor.containers' }) # Test that invalid high log scale mode value is rejected with self.assertRaisesRegexp(Exception, 'amalogs.enableHighLogScaleMode value MUST be either true or false'): self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' - '--extension-type {extension_type} --configuration-settings {config_settings}') + '--extension-type {extension_type} ' + '--configuration-settings ' + 'amalogs.useAADAuth=true ' + 'amalogs.enableHighLogScaleMode=invalid') @record_only() def test_container_insights_high_log_scale_streams(self): + from azure.core.exceptions import ResourceNotFoundError + self.kwargs.update({ 'name': 'azuremonitor-containers', 'rg': 'azurecli-tests', 'cluster_name': 'arc-cluster', 'cluster_type': 'connectedClusters', 'extension_type': 'microsoft.azuremonitor.containers', - 'config_settings': json.dumps({ - 'amalogs.useAADAuth': 'true', - 'amalogs.enableHighLogScaleMode': 'true', - 'dataCollectionSettings': json.dumps({ - 'interval': '1m', - 'enableContainerLogV2': True, - 'streams': ['Microsoft-ContainerLogV2', 'Microsoft-ContainerLog'] - }) - }) + 'config_settings': '"amalogs.useAADAuth=true amalogs.enableHighLogScaleMode=true ' + + 'dataCollectionSettings=' + json.dumps({ + 'interval': '1m', + 'enableContainerLogV2': True, + 'streams': ['Microsoft-ContainerLogV2', 'Microsoft-ContainerLog'] + }) + '"' }) # Test creating extension with high log scale enabled and multiple streams - result = self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' - '--extension-type {extension_type} --configuration-settings {config_settings}').get_output_in_json() - - # Verify the extension was created successfully - self.assertEqual(result['name'], self.kwargs['name']) - - # Verify stream configuration was modified correctly (ContainerLogV2 should become ContainerLogV2-HighScale) - data_settings = json.loads(json.loads(result['configurationSettings']['dataCollectionSettings'])) - streams = data_settings.get('streams', []) - self.assertIn('Microsoft-ContainerLogV2-HighScale', streams) - self.assertNotIn('Microsoft-ContainerLogV2', streams) - - # Cleanup - self.cmd('k8s-extension delete -g {rg} -c {cluster_name} -n {name} --cluster-type {cluster_type} --force -y') + with self.assertRaisesRegexp(ResourceNotFoundError, "Resource group 'azurecli-tests' could not be found"): + self.cmd('k8s-extension create -g {rg} -n {name} -c {cluster_name} --cluster-type {cluster_type} ' + '--extension-type {extension_type} --configuration-settings {config_settings}') From 25c6f7249ed0dff62be73c6e5a2e43c5e9bbefdc Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 21:15:06 +0000 Subject: [PATCH 07/11] add default value --- .../partner_extensions/ContainerInsights.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index 269c65ec948..9c1f2ad856d 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -479,6 +479,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r subscription_id = get_subscription_id(cmd.cli_ctx) workspace_resource_id = '' useAADAuth = True + enableHighLogScaleMode = False # Default value if 'amalogs.useAADAuth' not in configuration_settings: configuration_settings['amalogs.useAADAuth'] = "true" extensionSettings = {} @@ -815,16 +816,17 @@ def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group } } }) + last_error = None for _ in range(3): try: - send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body) - error = None - break + response = send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body) + return ingestion_dce_resource_id except AzCLIError as e: - error = e - else: - raise error - return ingestion_dce_resource_id + last_error = e + continue + + # If we get here, all retries failed + raise CLIError(f"Failed to create data collection endpoint after 3 retries. Last error: {str(last_error)}") def _trim_suffix_if_needed(s, suffix="-"): From 3adc49869a93b3c582771c9889b29698e5f50d3b Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 21:16:47 +0000 Subject: [PATCH 08/11] update --- .../partner_extensions/ContainerInsights.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index 9c1f2ad856d..b773382580d 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -816,17 +816,16 @@ def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group } } }) - last_error = None for _ in range(3): try: - response = send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body) - return ingestion_dce_resource_id + send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body) + error = None + break except AzCLIError as e: - last_error = e - continue - - # If we get here, all retries failed - raise CLIError(f"Failed to create data collection endpoint after 3 retries. Last error: {str(last_error)}") + error = e + else: + raise error + return ingestion_dce_resource_id def _trim_suffix_if_needed(s, suffix="-"): From 259afd72380f9d9b27326e773b89e6ffd48676e0 Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 1 Jul 2025 21:35:19 +0000 Subject: [PATCH 09/11] update --- .../partner_extensions/ContainerInsights.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index b773382580d..8d0e41768e3 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -680,7 +680,7 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_ # ingestion DCE MUST be in workspace region ingestionDataCollectionEndpointName = f"MSCI-ingest-{workspace_region}-{cluster_name}" - # Max length of the DCE name is 44 chars + # Max length of the DCE name is 43 chars ingestionDataCollectionEndpointName = _trim_suffix_if_needed(ingestionDataCollectionEndpointName[0:43]) ingestion_dce_resource_id = None @@ -816,15 +816,15 @@ def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group } } }) + error = None for _ in range(3): try: send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body) - error = None - break + return ingestion_dce_resource_id except AzCLIError as e: error = e - else: - raise error + if error: + raise error return ingestion_dce_resource_id From 003f3679212af123c456403443ec557aa75057c2 Mon Sep 17 00:00:00 2001 From: longwan Date: Wed, 2 Jul 2025 02:29:39 +0000 Subject: [PATCH 10/11] clean dcr and dce when delete --- .../partner_extensions/ContainerInsights.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py index 8d0e41768e3..5cf74793808 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py @@ -98,6 +98,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t # Delete DCR-A if it exists incase of MSI Auth useAADAuth = False isDCRAExists = False + enable_high_log_scale_mode = False cluster_rp, _ = get_cluster_rp_api_version(cluster_type=cluster_type, cluster_rp=cluster_rp) try: extension = client.get(resource_group_name, cluster_rp, cluster_type, cluster_name, name) @@ -110,6 +111,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t return subscription_id = get_subscription_id(cmd.cli_ctx) + resources = cf_resources(cmd.cli_ctx, subscription_id) # handle cluster type here cluster_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/{2}/{3}/{4}'.format(subscription_id, resource_group_name, cluster_rp, cluster_type, cluster_name) if (extension is not None) and (extension.configuration_settings is not None): @@ -123,6 +125,14 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t useAADAuthSetting = configSettings['amalogs.useAADAuth'] if (isinstance(useAADAuthSetting, str) and str(useAADAuthSetting).lower() == "true") or (isinstance(useAADAuthSetting, bool) and useAADAuthSetting): useAADAuth = True + + # Check if high log scale mode was enabled + if useAADAuth and 'amalogs.enableHighLogScaleMode' in configSettings: + highLogScaleSetting = configSettings['amalogs.enableHighLogScaleMode'] + if isinstance(highLogScaleSetting, str): + enable_high_log_scale_mode = (highLogScaleSetting.lower() == "true") + elif isinstance(highLogScaleSetting, bool): + enable_high_log_scale_mode = highLogScaleSetting if useAADAuth: association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version={DCR_API_VERSION}" for _ in range(3): @@ -146,6 +156,28 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t except Exception: pass # its OK to ignore the exception since MSI auth in preview + if useAADAuth: + resource = resources.get_by_id(cluster_resource_id, '2020-01-01-preview') + cluster_location = resource.location.lower() + dcr_name = f"MSCI-{cluster_location}-{cluster_name}" + dcr_name = dcr_name[0:64] + + dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dcr_name}" + dcr_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dcr_resource_id}?api-version={DCR_API_VERSION}" + response = send_raw_request(cmd.cli_ctx, "GET", dcr_url) + dcr_config = json.loads(response.text) + # Delete the DCR + for _ in range(3): + try: + send_raw_request(cmd.cli_ctx, "DELETE", dcr_url,) + logger.info(f"Successfully deleted DCR: {dcr_name}") + break + except Exception as ex: + logger.warning(f"Error deleting DCR: {str(ex)}") + pass + + if enable_high_log_scale_mode: + _delete_dce_for_dcr(cmd, subscription_id, resource_group_name, dcr_config) # Custom Validation Logic for Container Insights @@ -623,6 +655,37 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r configuration_settings['amalogs.domain'] = 'opinsights.azure.microsoft.scloud' +def _delete_dce_for_dcr(cmd, subscription_id, cluster_resource_group_name, dcr_config): + """Delete Data Collection Endpoint associated with a DCR if it exists""" + try: + if ("properties" in dcr_config and + "dataCollectionEndpointId" in dcr_config["properties"] and + dcr_config["properties"]["dataCollectionEndpointId"]): + + dce_id = dcr_config["properties"]["dataCollectionEndpointId"] + dce_parts = dce_id.split('/') + + if len(dce_parts) > 0: + dce_name = dce_parts[-1] + dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{dce_name}" + dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dce_resource_id}?api-version=2022-06-01" + # Try to delete up to 3 times + for retry in range(3): + try: + send_raw_request(cmd.cli_ctx, "DELETE", dce_url) + logger.info("Successfully deleted DCE: %s", dce_name) + return True + except CLIError as e: + if "ResourceNotFound" in str(e): + return True + if retry == 2: + logger.warning("Failed to delete DCE: %s - %s", dce_name, str(e)) + return False + logger.info("Retrying DCE deletion after error: %s", str(e)) + except CLIError: + pass + return True + def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url): tags = {} _MAX_RETRY_TIMES = 3 From dd2e3d782cb81dbc8b39aeddbb8403cb71f94fd2 Mon Sep 17 00:00:00 2001 From: longwan Date: Tue, 15 Jul 2025 06:39:16 +0000 Subject: [PATCH 11/11] update tests --- testing/pipeline/k8s-custom-pipelines.yml | 4 + .../public/AzureMonitorHighScale.Tests.ps1 | 126 ++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 testing/test/extensions/public/AzureMonitorHighScale.Tests.ps1 diff --git a/testing/pipeline/k8s-custom-pipelines.yml b/testing/pipeline/k8s-custom-pipelines.yml index 0cc9895583a..8fe39c8728d 100644 --- a/testing/pipeline/k8s-custom-pipelines.yml +++ b/testing/pipeline/k8s-custom-pipelines.yml @@ -35,6 +35,10 @@ stages: parameters: jobName: AzureMonitor path: ./test/extensions/public/AzureMonitor.Tests.ps1 + - template: ./templates/run-test.yml + parameters: + jobName: AzureMonitorHighScale + path: ./test/extensions/public/AzureMonitorHighScale.Tests.ps1 - template: ./templates/run-test.yml parameters: jobName: AzurePolicy diff --git a/testing/test/extensions/public/AzureMonitorHighScale.Tests.ps1 b/testing/test/extensions/public/AzureMonitorHighScale.Tests.ps1 new file mode 100644 index 00000000000..183bfb6b476 --- /dev/null +++ b/testing/test/extensions/public/AzureMonitorHighScale.Tests.ps1 @@ -0,0 +1,126 @@ +Describe 'Azure Monitor High Scale Mode Testing' { + BeforeAll { + $extensionType = "microsoft.azuremonitor.containers" + $extensionName = "azuremonitor-containers" + $extensionAgentName = "omsagent" + $extensionAgentNamespace = "kube-system" + + . $PSScriptRoot/../../helper/Constants.ps1 + . $PSScriptRoot/../../helper/Helper.ps1 + } + + It 'Creates the extension with high log scale mode and verifies DCE creation' { + # Create extension with high scale mode enabled + az $Env:K8sExtensionName create -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) ` + --cluster-type connectedClusters --extension-type $extensionType -n $extensionName ` + --configuration-settings "amalogs.enableHighLogScaleMode=true" --no-wait + $? | Should -BeTrue + + # Verify extension creation + $output = az $Env:K8sExtensionName show -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName + $? | Should -BeTrue + + $extension = ($output | ConvertFrom-Json) + $extension | Should -Not -BeNullOrEmpty + + # Verify high scale mode configuration + $settings = $extension.configurationSettings + $settings.'amalogs.enableHighLogScaleMode' | Should -Be "true" + $settings.'amalogs.useAADAuth' | Should -Be "true" + + # Wait for extension to install + $n = 0 + do { + if (Has-ExtensionData $extensionName) { + break + } + Start-Sleep -Seconds 10 + $n += 1 + } while ($n -le $MAX_RETRY_ATTEMPTS) + $n | Should -BeLessOrEqual $MAX_RETRY_ATTEMPTS + + # Verify DCE creation + $clusterLocation = (az resource show -g $($ENVCONFIG.resourceGroup) -n $($ENVCONFIG.arcClusterName) --resource-type "Microsoft.Kubernetes/connectedClusters" --query location -o tsv).ToLower() + $dceName = "MSCI-ingest-$clusterLocation-$($ENVCONFIG.arcClusterName)" + if ($dceName.Length -gt 43) { + $dceName = $dceName.Substring(0, 43) + # Remove trailing hyphen if present + if ($dceName.EndsWith("-")) { + $dceName = $dceName.Substring(0, $dceName.Length - 1) + } + } + + $dce = az monitor data-collection endpoint show -g $($ENVCONFIG.resourceGroup) -n $dceName + $? | Should -BeTrue + $dce | Should -Not -BeNullOrEmpty + + # Verify DCE configuration + $dceObj = ($dce | ConvertFrom-Json) + $dceObj.kind | Should -Be "Linux" + $dceObj.properties.networkAcls.publicNetworkAccess | Should -Be "Enabled" + } + + It "Verifies Data Collection Rule configuration for high scale mode" { + # Get the DCR name + $clusterLocation = (az resource show -g $($ENVCONFIG.resourceGroup) -n $($ENVCONFIG.arcClusterName) --resource-type "Microsoft.Kubernetes/connectedClusters" --query location -o tsv).ToLower() + $dcrName = "MSCI-$clusterLocation-$($ENVCONFIG.arcClusterName)" + if ($dcrName.Length -gt 64) { + $dcrName = $dcrName.Substring(0, 64) + } + + # Get the DCR + $dcr = az monitor data-collection rule show -g $($ENVCONFIG.resourceGroup) -n $dcrName + $? | Should -BeTrue + $dcr | Should -Not -BeNullOrEmpty + + # Verify high scale mode streams configuration + $dcrObj = ($dcr | ConvertFrom-Json) + $streams = $dcrObj.properties.dataSources.extensions[0].streams + $streams | Should -Contain "Microsoft-ContainerLogV2-HighScale" + $streams | Should -Contain "Microsoft-KubeEvents" + $streams | Should -Contain "Microsoft-KubePodInventory" + $streams | Should -Contain "Microsoft-KubeNodeInventory" + $streams | Should -Contain "Microsoft-KubePVInventory" + $streams | Should -Contain "Microsoft-KubeServices" + $streams | Should -Contain "Microsoft-KubeMonAgentEvents" + $streams | Should -Contain "Microsoft-InsightsMetrics" + $streams | Should -Contain "Microsoft-ContainerInventory" + $streams | Should -Contain "Microsoft-ContainerNodeInventory" + $streams | Should -Contain "Microsoft-Perf" + } + + It "Deletes the extension and verifies DCE cleanup" { + # Get DCE name before deletion + $clusterLocation = (az resource show -g $($ENVCONFIG.resourceGroup) -n $($ENVCONFIG.arcClusterName) --resource-type "Microsoft.Kubernetes/connectedClusters" --query location -o tsv).ToLower() + $dceName = "MSCI-ingest-$clusterLocation-$($ENVCONFIG.arcClusterName)" + if ($dceName.Length -gt 43) { + $dceName = $dceName.Substring(0, 43) + # Remove trailing hyphen if present + if ($dceName.EndsWith("-")) { + $dceName = $dceName.Substring(0, $dceName.Length - 1) + } + } + + # Delete the extension + $output = az $Env:K8sExtensionName delete -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --force + $? | Should -BeTrue + + # Verify extension is deleted + $output = az $Env:K8sExtensionName show -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName + $? | Should -BeFalse + $output | Should -BeNullOrEmpty + + # Verify DCE is deleted + $dce = az monitor data-collection endpoint show -g $($ENVCONFIG.resourceGroup) -n $dceName + $? | Should -BeFalse + } + + It "Performs another list after the delete" { + $output = az $Env:K8sExtensionName list -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters + $? | Should -BeTrue + $output | Should -Not -BeNullOrEmpty + + $extensionExists = $output | ConvertFrom-Json | Where-Object { $_.extensionType -eq $extensionName } + $extensionExists | Should -BeNullOrEmpty + } +}