1+ """
2+ Rule: gcp.compute.disk.unattached
3+
4+ (spec — docs/specs/gcp/disk_unattached.md)
5+
6+ Intent:
7+ Detect Compute Engine persistent disks that are currently unattached to
8+ any VM and still bill for storage so they can be reviewed as conservative
9+ cleanup candidates.
10+
11+ Exclusions:
12+ - disk record malformed or name absent/empty (spec 8.1)
13+ - aggregated scope key unsupported or unresolvable (spec 8.2)
14+ - disk status not exactly "READY" (spec 8.4)
15+ - disk users field unresolvable / not an explicit empty list (spec 8.5)
16+ - disk users non-empty (spec 8.6)
17+
18+ Detection:
19+ - disk status == "READY"
20+ - users[] is explicitly an empty list (no current VM attachment)
21+ - covers zonal (zones/ZONE) and regional (regions/REGION) scopes
22+
23+ Confidence (spec 9.4):
24+ - Zonal, detached < 24h: LOW
25+ - Zonal, detached 24h–7d: MEDIUM
26+ - Zonal, detached >= 7d (or never): HIGH
27+ - Regional, detached < 24h: LOW
28+ - Regional, otherwise: MEDIUM
29+
30+ Cost model (spec 9.5):
31+ - estimated_monthly_cost_usd = None
32+ - No flat region-reference price table; pricing varies by type, region, and currency.
33+ - State only that unattached disks continue to incur storage charges.
34+
35+ APIs:
36+ - compute.disks.list (via disks.aggregatedList)
37+ """
38+
39+ import warnings
140from datetime import datetime , timezone
241from typing import List , Optional
342
948from cleancloud .core .finding import Finding
1049from cleancloud .core .risk import RiskLevel
1150
12- # GCP Persistent Disk pricing ($/GB/month, us-central1 reference).
13- # Source: https://cloud.google.com/compute/disks-image-pricing
14- #
15- # Notes:
16- # - pd-extreme also bills for provisioned IOPS separately (not estimable from listing).
17- # - Hyperdisk types bill for capacity + provisioned IOPS and/or throughput separately.
18- # Only the capacity component can be estimated here; actual cost is typically higher.
19- # hyperdisk-balanced: 3,000 IOPS and 140 MiB/s free baseline, additional usage billed.
20- # hyperdisk-extreme: all provisioned IOPS billable, no free baseline.
21- # hyperdisk-throughput: all provisioned throughput billable.
22- # Using pd-standard rate as conservative capacity-only floor for all hyperdisk types.
23- _DISK_TYPE_COST_PER_GB : dict = {
24- "pd-standard" : 0.04 ,
25- "pd-balanced" : 0.10 ,
26- "pd-ssd" : 0.17 ,
27- "pd-extreme" : 0.125 , # capacity only; provisioned IOPS billed separately
28- "hyperdisk-balanced" : 0.04 , # capacity only; IOPS + throughput billed separately
29- "hyperdisk-extreme" : 0.04 , # capacity only; all IOPS billed separately
30- "hyperdisk-throughput" : 0.04 , # capacity only; throughput billed separately
31- }
32- _DEFAULT_COST_PER_GB = 0.04 # pd-standard as conservative fallback
33-
3451_HYPERDISK_TYPES = frozenset ({"hyperdisk-balanced" , "hyperdisk-extreme" , "hyperdisk-throughput" })
3552
3653
@@ -46,17 +63,6 @@ def find_unattached_disks(
4663 Persistent disks bill regardless of attachment status. Orphaned disks are
4764 commonly left behind after VM deletion — a high-volume, zero-utility cost source.
4865
49- Detection logic:
50- - Disk status == READY (exists, not being created or deleted)
51- - Disk users list is empty (not attached to any instance)
52- - Covers both zonal disks (zones/ZONE) and regional disks (regions/REGION)
53-
54- Confidence:
55- - Zonal disk, unattached, detached > 7 days ago (or never detached): HIGH
56- - Zonal disk, detached 24h–7d ago: MEDIUM — may still be in a deletion pipeline
57- - Either type detached < 24h ago: LOW — very likely mid-pipeline
58- - Regional disk, unattached: MEDIUM — may be intentionally kept for HA failover
59-
6066 IAM permissions required:
6167 - compute.disks.list (included in roles/compute.viewer)
6268 """
@@ -71,17 +77,39 @@ def find_unattached_disks(
7177 # See: https://cloud.google.com/compute/docs/reference/rest/v1/disks/aggregatedList
7278 try :
7379 for scope_key , scope_disks in disks_client .aggregated_list (project = project_id ):
80+ # spec 9.6 / 9.1.8-9: surface partial-success warnings so callers know
81+ # that zero findings cannot be interpreted as a clean project.
82+ _scope_warning = getattr (scope_disks , "warning" , None )
83+ if _scope_warning and getattr (_scope_warning , "code" , "" ):
84+ warnings .warn (
85+ f"gcp.compute.disk.unattached: aggregated inventory returned partial "
86+ f"coverage for scope '{ scope_key } ' "
87+ f"(code: { _scope_warning .code } ) — findings from this scope may be incomplete" ,
88+ UserWarning ,
89+ stacklevel = 2 ,
90+ )
91+
7492 if not scope_disks .disks :
7593 continue
7694
77- # scope_key is "zones/us-central1-a " or "regions/us-central1"
95+ # --- spec 8.2: scope key must be exactly "zones/ZONE " or "regions/REGION" ---
7896 scope_parts = scope_key .split ("/" )
97+ if len (scope_parts ) != 2 :
98+ continue # too few or too many segments — skip
99+
79100 scope_type = scope_parts [0 ] # "zones" or "regions"
80101 location = scope_parts [1 ] # zone name or region name
81102
82103 if scope_type == "zones" :
83104 zone_name = location
84- region = zone_name .rsplit ("-" , 1 )[0 ] # "us-central1-a" -> "us-central1"
105+ # spec 7 / 9.1.5: derive region by stripping the trailing zone letter.
106+ # GCP zone names always end in a single alphabetic letter (e.g. "-a").
107+ # A scope like zones/us-central1 (no zone letter) must skip — rsplit
108+ # alone would silently derive "us" from "us-central1", which is wrong.
109+ zone_parts = zone_name .rsplit ("-" , 1 )
110+ if len (zone_parts ) < 2 or len (zone_parts [1 ]) != 1 or not zone_parts [1 ].isalpha ():
111+ continue # spec 8.2 / 7: zone lacks a single-letter suffix → skip
112+ region = zone_parts [0 ]
85113 is_regional = False
86114 elif scope_type == "regions" :
87115 zone_name = None
@@ -94,46 +122,55 @@ def find_unattached_disks(
94122 continue
95123
96124 for disk in scope_disks .disks :
125+ # spec 8.1: skip malformed records with absent / empty name
126+ if not disk .name :
127+ continue
128+
129+ # spec 8.4: only READY disks are eligible
97130 if disk .status != "READY" :
98131 continue
99- if disk .users : # non-empty = attached to one or more VMs
132+
133+ # spec 8.5: users must be an explicit list — any other type
134+ # (None, str, dict, tuple, …) means attachment state is unresolvable.
135+ # An empty list is the only value that safely means "unattached".
136+ if not isinstance (disk .users , list ):
137+ continue
138+ # spec 8.6: any current user entry means attached → skip
139+ if disk .users :
100140 continue
101141
102- # Extract short disk type from full resource URL
142+ # Extract short disk type from full resource URL.
103143 # e.g. "zones/us-central1-a/diskTypes/pd-ssd" -> "pd-ssd"
144+ # spec 7: fallback to "unknown" (not a guessed default type)
104145 disk_type_url = disk .type_ or ""
105- disk_type = disk_type_url .split ("/" )[- 1 ] if disk_type_url else "pd-standard "
146+ disk_type = disk_type_url .split ("/" )[- 1 ] if disk_type_url else "unknown "
106147
107- size_gb = int (disk .size_gb ) if disk .size_gb is not None else 0
108- cost_per_gb = _DISK_TYPE_COST_PER_GB .get (disk_type , _DEFAULT_COST_PER_GB )
109- monthly_cost = round (size_gb * cost_per_gb , 2 )
148+ # spec 7: parse size as non-negative int; use 0 on malformed values
149+ try :
150+ size_gb = int (disk .size_gb ) if disk .size_gb is not None else 0
151+ except (ValueError , TypeError ):
152+ size_gb = 0
110153
111154 labels = dict (disk .labels ) if disk .labels else {}
112155
113156 # Regional disks use a different resource path than zonal disks.
114- # lastAttachTimestamp / lastDetachTimestamp are [Output Only] fields
115- # confirmed in GCP Disk API:
116- # https://cloud.google.com/compute/docs/reference/rest/v1/disks
117157 if is_regional :
118158 resource_id = f"projects/{ project_id } /regions/{ region } /disks/{ disk .name } "
119159 report_location = region
120160 else :
121161 resource_id = f"projects/{ project_id } /zones/{ zone_name } /disks/{ disk .name } "
122162 report_location = zone_name
123163
124- # Confidence: regional disks are often intentionally provisioned for HA
125- # (replicated across two zones); an unattached regional disk is more
126- # ambiguous than an unattached zonal disk.
164+ # spec 9.4: confidence baseline
127165 confidence = ConfidenceLevel .MEDIUM if is_regional else ConfidenceLevel .HIGH
128166
129167 # Modulate confidence by time since last detach.
130- # A disk detached < 24h ago may be mid-pipeline (VM deleted, disk
131- # deletion pending). After 7 days the disk is almost certainly orphaned.
132- last_detach_str = disk . last_detach_timestamp or ""
168+ # spec 7: treat non-string timestamps as unknown rather than crashing.
169+ raw_ts = disk . last_detach_timestamp
170+ last_detach_str = raw_ts if isinstance ( raw_ts , str ) else ""
133171 hours_since_detach : Optional [float ] = None
134172 if last_detach_str :
135173 try :
136- # GCP uses RFC3339; handle both "+HH:MM" and "Z" offsets
137174 ts = last_detach_str .replace ("Z" , "+00:00" )
138175 last_detach = datetime .fromisoformat (ts )
139176 if last_detach .tzinfo is None :
@@ -142,17 +179,17 @@ def find_unattached_disks(
142179 if hours_since_detach < 24 :
143180 confidence = ConfidenceLevel .LOW
144181 elif hours_since_detach < 7 * 24 and not is_regional :
145- # Zonal disk detached 24h–7d ago: still plausibly in a pipeline.
146- # Regional disks stay at their MEDIUM base regardless .
182+ # Zonal disk detached 24h–7d: plausibly still mid- pipeline.
183+ # Regional disks remain at MEDIUM baseline .
147184 confidence = ConfidenceLevel .MEDIUM
148- except ValueError :
149- pass
185+ except ( ValueError , AttributeError ) :
186+ pass # unparseable timestamp — keep baseline confidence
150187
151188 signals_used = [
152189 "Disk status: READY" ,
153190 "No VM users (users list empty)" ,
154- f"Disk type: { disk_type } (~$ { cost_per_gb } /GB/month storage) " ,
155- f"Size: { size_gb } GB -> ~$ { monthly_cost } /month (estimated, region-dependent) " ,
191+ f"Disk type: { disk_type } " ,
192+ f"Size: { size_gb } GB" ,
156193 ]
157194 if is_regional :
158195 signals_used .append (
@@ -161,15 +198,16 @@ def find_unattached_disks(
161198 if hours_since_detach is not None :
162199 signals_used .append (f"Last detached: { hours_since_detach :.0f} h ago" )
163200
201+ # spec 9.5 / 10.2: never claim a specific dollar cost — pricing varies
164202 signals_not_checked = [
203+ "Exact monthly cost (varies by disk type, region, currency, and provisioned performance — see GCP billing)" ,
165204 "Disk reserved for imminent VM recreation" ,
166- "Snapshot-only workflow (intentional detachment )" ,
205+ "Snapshot, image, or template restore dependency (disk may be intentionally detached )" ,
167206 "Cross-project disk sharing" ,
168207 ]
169208 if disk_type in _HYPERDISK_TYPES :
170209 signals_not_checked .append (
171- f"Hyperdisk IOPS and throughput charges are billed separately from "
172- f"capacity — actual monthly cost is likely higher than ~${ monthly_cost } "
210+ "Hyperdisk IOPS and throughput charges are billed separately from capacity cost"
173211 )
174212 if disk_type == "pd-extreme" :
175213 signals_not_checked .append (
@@ -201,8 +239,7 @@ def find_unattached_disks(
201239 summary = (
202240 f"Persistent disk '{ disk .name } ' ({ size_gb } GB, { disk_type } ) "
203241 f"in { 'region' if is_regional else 'zone' } '{ report_location } ' "
204- f"is not attached to any VM but continues to incur storage "
205- f"charges (~${ monthly_cost } /month, estimated, region-dependent)."
242+ f"is not attached to any VM but continues to incur storage charges."
206243 ),
207244 reason = "Disk has no attached VM (users list is empty)" ,
208245 risk = RiskLevel .MEDIUM ,
@@ -214,7 +251,8 @@ def find_unattached_disks(
214251 time_window = None ,
215252 ),
216253 details = details ,
217- estimated_monthly_cost_usd = (monthly_cost if monthly_cost > 0 else None ),
254+ # spec 9.5.1: cost model is None — pricing varies by type/region/currency
255+ estimated_monthly_cost_usd = None ,
218256 )
219257 )
220258
0 commit comments