Skip to content

Commit 0b3315f

Browse files
authored
Azure rules hardening - part 3 (#165)
* azure.compute.disk.unattached * azure.resource.untagged * azure.vm.stopped_not_deallocated * azure.virtual_network_gateway.idle
1 parent 82a4e62 commit 0b3315f

15 files changed

Lines changed: 7057 additions & 828 deletions

cleancloud/providers/azure/rules/unattached_managed_disks.py

Lines changed: 483 additions & 58 deletions
Large diffs are not rendered by default.

cleancloud/providers/azure/rules/untagged_resources.py

Lines changed: 440 additions & 58 deletions
Large diffs are not rendered by default.

cleancloud/providers/azure/rules/vm_stopped_not_deallocated.py

Lines changed: 189 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,125 @@
1+
"""
2+
Rule: azure.vm.stopped_not_deallocated
3+
4+
Intent:
5+
Detect Azure virtual machines in the billed stopped-allocated state
6+
(PowerState/stopped) that continue to incur compute charges even though
7+
they appear off.
8+
9+
This is a review-candidate rule only. It does not prove the stop was
10+
accidental, that the VM should be deleted, or that a specific monthly
11+
saving exists.
12+
13+
Exclusions:
14+
- id absent or empty
15+
- name absent or empty
16+
- outside optional region filter (exact lowercase match)
17+
- provisioning state does not resolve to exactly "Succeeded"
18+
- per-VM instance_view retrieval fails (skip that VM)
19+
- runtime power state unresolvable, absent, or not exactly PowerState/stopped
20+
21+
Detection:
22+
- provisioning state is "Succeeded" (SDK-first with nested fallback)
23+
- runtime power state resolves to exactly "PowerState/stopped" from
24+
instance_view statuses (one unambiguous code)
25+
26+
Cost model (spec 10):
27+
estimated_monthly_cost_usd = None (always)
28+
The rule may state that compute charges continue, but must not emit a
29+
numeric estimate without a SKU-aware pricing model.
30+
31+
APIs:
32+
- Microsoft.Compute/virtualMachines/read (virtual_machines.list_all)
33+
- Microsoft.Compute/virtualMachines/instanceView/action
34+
(virtual_machines.instance_view per VM)
35+
"""
36+
137
from datetime import datetime, timezone
238
from typing import List, Optional
339

40+
from azure.core.exceptions import HttpResponseError, ServiceRequestError, ServiceResponseError
441
from azure.mgmt.compute import ComputeManagementClient
542

643
from cleancloud.core.confidence import ConfidenceLevel
744
from cleancloud.core.evidence import Evidence
845
from cleancloud.core.finding import Finding
946
from cleancloud.core.risk import RiskLevel
1047

48+
_RULE_ID = "azure.vm.stopped_not_deallocated"
49+
_RESOURCE_TYPE = "azure.virtual_machine"
50+
_TARGET_POWER_STATE = "PowerState/stopped"
51+
52+
53+
def _norm_location(s: str) -> str:
54+
"""Lowercase only -- exact lowercase match per spec 7."""
55+
return s.lower() if s else ""
56+
1157

1258
def _extract_resource_group(vm_id: str) -> str:
13-
"""Extract resource group name from a VM resource ID."""
59+
"""Extract resource group name from a VM ARM resource ID."""
1460
parts = vm_id.split("/")
1561
for i, part in enumerate(parts):
1662
if part.lower() == "resourcegroups" and i + 1 < len(parts):
1763
return parts[i + 1]
1864
raise ValueError(f"Cannot extract resource group from VM ID: {vm_id}")
1965

2066

21-
def _get_power_state(instance_view) -> Optional[str]:
22-
"""Extract power state from instance view statuses."""
23-
for status in instance_view.statuses or []:
24-
if status.code and status.code.startswith("PowerState/"):
25-
return status.code
26-
return None
67+
# ---------------------------------------------------------------------------
68+
# Resolvers
69+
# ---------------------------------------------------------------------------
70+
71+
72+
def _resolve_provisioning_state(vm) -> Optional[str]:
73+
"""
74+
Resolve provisioning state from the VM model/control-plane payload per spec 9.1.
75+
76+
Priority:
77+
1. SDK projection: vm.provisioning_state
78+
2. nested/raw properties: vm.properties.provisioningState
79+
80+
Returns None (skip) on conflict or when both absent.
81+
Only "Succeeded" is eligible; caller skips on anything else.
82+
"""
83+
sdk_val = getattr(vm, "provisioning_state", None)
84+
85+
props = getattr(vm, "properties", None)
86+
nested_val = None
87+
if props is not None:
88+
nested_val = getattr(props, "provisioning_state", None)
89+
if nested_val is None:
90+
nested_val = getattr(props, "provisioningState", None)
91+
92+
if sdk_val is not None and nested_val is not None and sdk_val != nested_val:
93+
return None # conflict -> skip
94+
95+
return sdk_val or nested_val
96+
97+
98+
def _resolve_power_state(instance_view) -> Optional[str]:
99+
"""
100+
Extract the runtime power state from instance view statuses per spec 9.2.
101+
102+
Required behavior:
103+
- Collect all codes starting with "PowerState/"
104+
- Zero codes -> unknown -> return None (skip)
105+
- Exactly one unique code -> return it
106+
- Multiple conflicting codes -> ambiguous -> return None (skip)
107+
108+
Caller skips unless the returned value is exactly "PowerState/stopped".
109+
"""
110+
statuses = getattr(instance_view, "statuses", None) or []
111+
ps_codes = [
112+
s.code for s in statuses if getattr(s, "code", None) and s.code.startswith("PowerState/")
113+
]
114+
115+
if not ps_codes:
116+
return None # no power state present -> unknown -> skip
117+
118+
unique = set(ps_codes)
119+
if len(unique) > 1:
120+
return None # conflicting codes -> ambiguous -> skip
121+
122+
return ps_codes[0]
27123

28124

29125
def find_stopped_not_deallocated_vms(
@@ -34,14 +130,14 @@ def find_stopped_not_deallocated_vms(
34130
client: Optional[ComputeManagementClient] = None,
35131
) -> List[Finding]:
36132
"""
37-
Find Azure VMs that are stopped but not deallocated.
133+
Find Azure VMs that are stopped but not deallocated (PowerState/stopped).
38134
39-
VMs in 'Stopped' state (OS-level shutdown) still incur full compute charges.
40-
Only 'Deallocated' VMs stop incurring compute costs. This is a common Azure
41-
cost trap where users think their VM is off but are paying full price.
135+
VMs in the stopped-allocated state still incur full compute charges.
136+
Only deallocated VMs stop incurring compute costs.
42137
43138
IAM permissions:
44139
- Microsoft.Compute/virtualMachines/read
140+
- Microsoft.Compute/virtualMachines/instanceView/action
45141
"""
46142
findings: List[Finding] = []
47143

@@ -50,70 +146,111 @@ def find_stopped_not_deallocated_vms(
50146
subscription_id=subscription_id,
51147
)
52148

149+
now = datetime.now(timezone.utc)
150+
53151
for vm in compute_client.virtual_machines.list_all():
54-
if region_filter and (vm.location or "").lower() != region_filter.lower():
152+
# spec 8.1: id must be present and non-empty
153+
vm_id = getattr(vm, "id", None)
154+
if not vm_id:
155+
continue
156+
157+
# spec 8.2: name must be present and non-empty
158+
vm_name = getattr(vm, "name", None)
159+
if not vm_name:
160+
continue
161+
162+
# spec 8.3: region filter -- exact lowercase match
163+
location = _norm_location(getattr(vm, "location", "") or "")
164+
if region_filter and location != _norm_location(region_filter):
55165
continue
56166

167+
# spec 8.4 / 9.1: provisioning state must resolve to exactly "Succeeded"
168+
# Avoids transient Stopped observations during VM creation / start
169+
if _resolve_provisioning_state(vm) != "Succeeded":
170+
continue
171+
172+
# spec 8.5 / 9.2: per-VM instance_view to get authoritative runtime power state.
173+
# Expected per-VM retrieval failures -> skip this VM (spec 12).
174+
# ValueError: malformed ARM id from _extract_resource_group.
175+
# HttpResponseError: HTTP-level failure (404, 403, 429, 5xx).
176+
# ServiceRequestError: transport failure before a response (connection reset,
177+
# DNS, network timeout).
178+
# ServiceResponseError: transport failure while reading the response
179+
# (incomplete read, stream closed).
180+
# SerializationError, DeserializationError, and all other exceptions propagate.
57181
try:
58-
resource_group = _extract_resource_group(vm.id)
182+
resource_group = _extract_resource_group(vm_id)
59183
instance_view = compute_client.virtual_machines.instance_view(
60184
resource_group_name=resource_group,
61-
vm_name=vm.name,
185+
vm_name=vm_name,
62186
)
63-
except Exception:
187+
except (ValueError, HttpResponseError, ServiceRequestError, ServiceResponseError):
64188
continue
65189

66-
power_state = _get_power_state(instance_view)
190+
# spec 8.5 / 9.2: resolve power state; None = unknown -> skip
191+
power_state = _resolve_power_state(instance_view)
192+
if power_state is None:
193+
continue
67194

68-
if power_state != "PowerState/stopped":
195+
# spec 8.6: emit only for exact PowerState/stopped
196+
if power_state != _TARGET_POWER_STATE:
69197
continue
70198

71-
evidence = Evidence(
72-
signals_used=[
73-
"VM power state is 'Stopped' (not 'Deallocated')",
74-
"Stopped VMs incur full compute charges",
75-
],
76-
signals_not_checked=[
77-
"Whether stop was intentional or accidental",
78-
"Planned future usage",
79-
"IaC-managed intent",
80-
],
81-
time_window=None,
82-
)
199+
# --- Context fields (best-effort; never gate emission) ---
200+
hw = getattr(vm, "hardware_profile", None)
201+
vm_size = getattr(hw, "vm_size", None) if hw else None
202+
203+
sp = getattr(vm, "storage_profile", None)
204+
os_disk = getattr(sp, "os_disk", None) if sp else None
205+
os_type = getattr(os_disk, "os_type", None) if os_disk else None
83206

207+
tags = getattr(vm, "tags", None) or {} # spec 7: never None in output
208+
209+
# --- EMIT ---
84210
findings.append(
85211
Finding(
86212
provider="azure",
87-
rule_id="azure.vm.stopped_not_deallocated",
88-
resource_type="azure.virtual_machine",
89-
resource_id=vm.id,
90-
region=vm.location,
213+
rule_id=_RULE_ID,
214+
resource_type=_RESOURCE_TYPE,
215+
resource_id=vm_id,
216+
region=location,
217+
estimated_monthly_cost_usd=None, # spec 10: always None
91218
title="Azure VM Stopped but Not Deallocated",
92219
summary=(
93-
f"VM '{vm.name}' is stopped but not deallocated — "
94-
"still incurring full compute charges"
220+
f"VM '{vm_name}' is stopped but not deallocated — " "compute charges continue"
221+
),
222+
reason=(
223+
"Runtime power state is 'PowerState/stopped'; "
224+
"stopped-allocated VMs continue to incur compute charges"
95225
),
96-
reason="VM power state is 'Stopped' (not 'Deallocated')",
97226
risk=RiskLevel.HIGH,
98227
confidence=ConfidenceLevel.HIGH,
99-
detected_at=datetime.now(timezone.utc),
100-
evidence=evidence,
228+
detected_at=now,
229+
evidence=Evidence(
230+
signals_used=[
231+
"Provisioning state is 'Succeeded'",
232+
f"Runtime power state is exact '{_TARGET_POWER_STATE}' "
233+
"from instance_view statuses",
234+
"Stopped-allocated VMs continue to incur compute charges; "
235+
"only 'Deallocated' stops compute billing",
236+
],
237+
signals_not_checked=[
238+
"Whether the stop was intentional or accidental",
239+
"Planned restart or future usage",
240+
"IaC-managed or schedule-managed intent",
241+
"Reservation, savings plan, or licensing context",
242+
"Attached disk, networking, or license costs",
243+
],
244+
time_window=None,
245+
),
101246
details={
102-
"vm_name": vm.name,
103-
"vm_size": (
104-
getattr(vm.hardware_profile, "vm_size", None)
105-
if vm.hardware_profile
106-
else None
107-
),
108-
"os_type": (
109-
getattr(vm.storage_profile.os_disk, "os_type", None)
110-
if vm.storage_profile and vm.storage_profile.os_disk
111-
else None
112-
),
113-
"location": vm.location,
114-
"power_state": power_state,
247+
"vm_name": vm_name,
115248
"subscription_id": subscription_id,
116-
"tags": vm.tags,
249+
"power_state": power_state,
250+
"provisioning_state": "Succeeded",
251+
"vm_size": vm_size,
252+
"os_type": os_type,
253+
"tags": tags,
117254
},
118255
)
119256
)

0 commit comments

Comments
 (0)