Skip to content

Commit 5349924

Browse files
authored
fix: simplify logic and enable correct recording rule groups for managed prom extension (#7)
1 parent cd89abe commit 5349924

4 files changed

Lines changed: 64 additions & 27 deletions

File tree

src/k8s-extension/HISTORY.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Release History
66
1.6.5
77
++++++++++++++++++
88
* microsoft.dataprotection.kubernetes: Add support for 'DisableInformerCache' configuration.
9+
* microsoft.azuremonitor.containers.metrics: Simplify logic and enable correct recording rule groups for managed prom extension
910

1011
1.6.4
1112
++++++++++++++++++

src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/create.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,19 @@ def get_recording_rules_template(cmd, azure_monitor_workspace_resource_id):
1515
url = f"{armendpoint}{azure_monitor_workspace_resource_id}/providers/microsoft.alertsManagement/alertRuleRecommendations?api-version={ALERTS_API}"
1616
r = send_raw_request(cmd.cli_ctx, "GET", url, headers=headers)
1717
data = json.loads(r.text)
18-
return data['value']
18+
19+
filtered_templates = [
20+
template for template in data.get('value', [])
21+
# pylint: disable=line-too-long
22+
if template.get("properties", {}).get("alertRuleType", "").lower() == "microsoft.alertsmanagement/prometheusrulegroups" and isinstance(template.get("properties", {}).get("rulesArmTemplate", {}).get("resources"), list) and all(
23+
isinstance(rule, dict) and "record" in rule and "expression" in rule
24+
for resource in template["properties"]["rulesArmTemplate"]["resources"]
25+
if resource.get("type", "").lower() == "microsoft.alertsmanagement/prometheusrulegroups"
26+
for rule in resource.get("properties", {}).get("rules", [])
27+
)
28+
]
29+
30+
return filtered_templates
1931

2032

2133
# pylint: disable=line-too-long
@@ -39,8 +51,7 @@ def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, a
3951
for _ in range(3):
4052
try:
4153
headers = ['User-Agent=arc-azuremonitormetrics.put_rules.' + default_rule_group_name]
42-
send_raw_request(cmd.cli_ctx, "PUT", url,
43-
body=body, headers=headers)
54+
send_raw_request(cmd.cli_ctx, "PUT", url, body=body, headers=headers)
4455
break
4556
except CLIError as e:
4657
error = e
@@ -51,28 +62,28 @@ def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, a
5162
# pylint: disable=line-too-long
5263
def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region):
5364
default_rules_template = get_recording_rules_template(cmd, azure_monitor_workspace_resource_id)
54-
default_rule_group_name = "NodeRecordingRulesRuleGroup-{0}".format(cluster_name)
55-
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
56-
cluster_subscription,
57-
cluster_resource_group_name,
58-
default_rule_group_name
59-
)
60-
url = "{0}{1}?api-version={2}".format(
61-
cmd.cli_ctx.cloud.endpoints.resource_manager,
62-
default_rule_group_id,
63-
RULES_API
64-
)
65-
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 0)
6665

67-
default_rule_group_name = "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name)
68-
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
69-
cluster_subscription,
70-
cluster_resource_group_name,
71-
default_rule_group_name
72-
)
73-
url = "{0}{1}?api-version={2}".format(
74-
cmd.cli_ctx.cloud.endpoints.resource_manager,
75-
default_rule_group_id,
76-
RULES_API
77-
)
78-
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 1)
66+
for index, rule_template in enumerate(default_rules_template):
67+
rule_name = rule_template["name"]
68+
is_windows_rule = "win" in rule_name.lower()
69+
70+
# Skip any recording rules as ARC metrics extension doesn't have windows support
71+
if is_windows_rule:
72+
continue
73+
74+
rule_group_name = f"{rule_template['name']}-{cluster_name}"
75+
rule_group_id = f"/subscriptions/{cluster_subscription}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{rule_group_name}"
76+
url = f"{cmd.cli_ctx.cloud.endpoints.resource_manager}{rule_group_id}?api-version={RULES_API}"
77+
78+
put_rules(
79+
cmd,
80+
rule_group_id,
81+
rule_group_name,
82+
mac_region,
83+
azure_monitor_workspace_resource_id,
84+
cluster_name,
85+
default_rules_template,
86+
url,
87+
True,
88+
index
89+
)

src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/delete.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,10 @@ def delete_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster
3535
cluster_resource_group_name,
3636
"KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name)
3737
)
38+
delete_rule(
39+
cmd,
40+
cluster_subscription,
41+
cluster_resource_group_name,
42+
"UXRecordingRulesRuleGroup - {0}".format(cluster_name)
43+
)
44+

testing/test/extensions/public/AzureMonitorMetrics.Tests.ps1

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Describe 'Azure Monitor Metrics Testing' {
55
$extensionName = "azuremonitor-metrics"
66
$extensionAgentName = "ama-metrics"
77
$extensionAgentNamespace = "kube-system"
8+
$workspaceResourceGroup = $null # Initialize here for shared scope
89

910
. $PSScriptRoot/../../helper/Constants.ps1
1011
. $PSScriptRoot/../../helper/Helper.ps1
@@ -47,6 +48,23 @@ Describe 'Azure Monitor Metrics Testing' {
4748
$extensionExists | Should -Not -BeNullOrEmpty
4849
}
4950

51+
It 'Verifies rule groups were created' {
52+
$clusterName = $ENVCONFIG.arcClusterName
53+
$expectedRuleGroupNames = @(
54+
"KubernetesRecordingRulesRuleGroup-$clusterName",
55+
"NodeRecordingRulesRuleGroup-$clusterName"
56+
)
57+
58+
$ruleGroups = az resource list --resource-group $($ENVCONFIG.resourceGroup) --resource-type "Microsoft.AlertsManagement/prometheusRuleGroups" --query "[].{name:name, location:location, id:id}" | ConvertFrom-Json
59+
$ruleGroups | Should -Not -BeNullOrEmpty -Because "Rule groups may take time to be created after extension onboarding"
60+
61+
foreach ($expectedName in $expectedRuleGroupNames) {
62+
$matchingGroup = $ruleGroups | Where-Object { $_.name -eq $expectedName }
63+
$matchingGroup | Should -Not -BeNullOrEmpty -Because "Rule group '$expectedName' should have been created by create.py"
64+
}
65+
}
66+
67+
5068
It "Deletes the extension from the cluster" {
5169
$output = az $Env:K8sExtensionName delete -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --force
5270
$? | Should -BeTrue

0 commit comments

Comments
 (0)