diff --git a/cleancloud/providers/gcp/rules/ai/__init__.py b/cleancloud/providers/gcp/rules/ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cleancloud/providers/gcp/rules/featurestore_idle.py b/cleancloud/providers/gcp/rules/ai/featurestore_idle.py similarity index 100% rename from cleancloud/providers/gcp/rules/featurestore_idle.py rename to cleancloud/providers/gcp/rules/ai/featurestore_idle.py diff --git a/cleancloud/providers/gcp/rules/tpu_idle.py b/cleancloud/providers/gcp/rules/ai/tpu_idle.py similarity index 100% rename from cleancloud/providers/gcp/rules/tpu_idle.py rename to cleancloud/providers/gcp/rules/ai/tpu_idle.py diff --git a/cleancloud/providers/gcp/rules/vertex_endpoint_idle.py b/cleancloud/providers/gcp/rules/ai/vertex_endpoint_idle.py similarity index 100% rename from cleancloud/providers/gcp/rules/vertex_endpoint_idle.py rename to cleancloud/providers/gcp/rules/ai/vertex_endpoint_idle.py diff --git a/cleancloud/providers/gcp/rules/vertex_training_job_long_running.py b/cleancloud/providers/gcp/rules/ai/vertex_training_job_long_running.py similarity index 100% rename from cleancloud/providers/gcp/rules/vertex_training_job_long_running.py rename to cleancloud/providers/gcp/rules/ai/vertex_training_job_long_running.py diff --git a/cleancloud/providers/gcp/rules/workbench_idle.py b/cleancloud/providers/gcp/rules/ai/workbench_idle.py similarity index 100% rename from cleancloud/providers/gcp/rules/workbench_idle.py rename to cleancloud/providers/gcp/rules/ai/workbench_idle.py diff --git a/cleancloud/providers/gcp/rules/disk_unattached.py b/cleancloud/providers/gcp/rules/disk_unattached.py index 01184ff..b2f3db4 100644 --- a/cleancloud/providers/gcp/rules/disk_unattached.py +++ b/cleancloud/providers/gcp/rules/disk_unattached.py @@ -1,3 +1,42 @@ +""" +Rule: gcp.compute.disk.unattached + + (spec — docs/specs/gcp/disk_unattached.md) + +Intent: + Detect Compute Engine persistent disks that are currently unattached to + any VM and still bill for storage so they can be reviewed as conservative + cleanup candidates. + +Exclusions: + - disk record malformed or name absent/empty (spec 8.1) + - aggregated scope key unsupported or unresolvable (spec 8.2) + - disk status not exactly "READY" (spec 8.4) + - disk users field unresolvable / not an explicit empty list (spec 8.5) + - disk users non-empty (spec 8.6) + +Detection: + - disk status == "READY" + - users[] is explicitly an empty list (no current VM attachment) + - covers zonal (zones/ZONE) and regional (regions/REGION) scopes + +Confidence (spec 9.4): + - Zonal, detached < 24h: LOW + - Zonal, detached 24h–7d: MEDIUM + - Zonal, detached >= 7d (or never): HIGH + - Regional, detached < 24h: LOW + - Regional, otherwise: MEDIUM + +Cost model (spec 9.5): + - estimated_monthly_cost_usd = None + - No flat region-reference price table; pricing varies by type, region, and currency. + - State only that unattached disks continue to incur storage charges. + +APIs: + - compute.disks.list (via disks.aggregatedList) +""" + +import warnings from datetime import datetime, timezone from typing import List, Optional @@ -9,28 +48,6 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -# GCP Persistent Disk pricing ($/GB/month, us-central1 reference). -# Source: https://cloud.google.com/compute/disks-image-pricing -# -# Notes: -# - pd-extreme also bills for provisioned IOPS separately (not estimable from listing). -# - Hyperdisk types bill for capacity + provisioned IOPS and/or throughput separately. -# Only the capacity component can be estimated here; actual cost is typically higher. -# hyperdisk-balanced: 3,000 IOPS and 140 MiB/s free baseline, additional usage billed. -# hyperdisk-extreme: all provisioned IOPS billable, no free baseline. -# hyperdisk-throughput: all provisioned throughput billable. -# Using pd-standard rate as conservative capacity-only floor for all hyperdisk types. -_DISK_TYPE_COST_PER_GB: dict = { - "pd-standard": 0.04, - "pd-balanced": 0.10, - "pd-ssd": 0.17, - "pd-extreme": 0.125, # capacity only; provisioned IOPS billed separately - "hyperdisk-balanced": 0.04, # capacity only; IOPS + throughput billed separately - "hyperdisk-extreme": 0.04, # capacity only; all IOPS billed separately - "hyperdisk-throughput": 0.04, # capacity only; throughput billed separately -} -_DEFAULT_COST_PER_GB = 0.04 # pd-standard as conservative fallback - _HYPERDISK_TYPES = frozenset({"hyperdisk-balanced", "hyperdisk-extreme", "hyperdisk-throughput"}) @@ -46,17 +63,6 @@ def find_unattached_disks( Persistent disks bill regardless of attachment status. Orphaned disks are commonly left behind after VM deletion — a high-volume, zero-utility cost source. - Detection logic: - - Disk status == READY (exists, not being created or deleted) - - Disk users list is empty (not attached to any instance) - - Covers both zonal disks (zones/ZONE) and regional disks (regions/REGION) - - Confidence: - - Zonal disk, unattached, detached > 7 days ago (or never detached): HIGH - - Zonal disk, detached 24h–7d ago: MEDIUM — may still be in a deletion pipeline - - Either type detached < 24h ago: LOW — very likely mid-pipeline - - Regional disk, unattached: MEDIUM — may be intentionally kept for HA failover - IAM permissions required: - compute.disks.list (included in roles/compute.viewer) """ @@ -71,17 +77,39 @@ def find_unattached_disks( # See: https://cloud.google.com/compute/docs/reference/rest/v1/disks/aggregatedList try: for scope_key, scope_disks in disks_client.aggregated_list(project=project_id): + # spec 9.6 / 9.1.8-9: surface partial-success warnings so callers know + # that zero findings cannot be interpreted as a clean project. + _scope_warning = getattr(scope_disks, "warning", None) + if _scope_warning and getattr(_scope_warning, "code", ""): + warnings.warn( + f"gcp.compute.disk.unattached: aggregated inventory returned partial " + f"coverage for scope '{scope_key}' " + f"(code: {_scope_warning.code}) — findings from this scope may be incomplete", + UserWarning, + stacklevel=2, + ) + if not scope_disks.disks: continue - # scope_key is "zones/us-central1-a" or "regions/us-central1" + # --- spec 8.2: scope key must be exactly "zones/ZONE" or "regions/REGION" --- scope_parts = scope_key.split("/") + if len(scope_parts) != 2: + continue # too few or too many segments — skip + scope_type = scope_parts[0] # "zones" or "regions" location = scope_parts[1] # zone name or region name if scope_type == "zones": zone_name = location - region = zone_name.rsplit("-", 1)[0] # "us-central1-a" -> "us-central1" + # spec 7 / 9.1.5: derive region by stripping the trailing zone letter. + # GCP zone names always end in a single alphabetic letter (e.g. "-a"). + # A scope like zones/us-central1 (no zone letter) must skip — rsplit + # alone would silently derive "us" from "us-central1", which is wrong. + zone_parts = zone_name.rsplit("-", 1) + if len(zone_parts) < 2 or len(zone_parts[1]) != 1 or not zone_parts[1].isalpha(): + continue # spec 8.2 / 7: zone lacks a single-letter suffix → skip + region = zone_parts[0] is_regional = False elif scope_type == "regions": zone_name = None @@ -94,26 +122,38 @@ def find_unattached_disks( continue for disk in scope_disks.disks: + # spec 8.1: skip malformed records with absent / empty name + if not disk.name: + continue + + # spec 8.4: only READY disks are eligible if disk.status != "READY": continue - if disk.users: # non-empty = attached to one or more VMs + + # spec 8.5: users must be an explicit list — any other type + # (None, str, dict, tuple, …) means attachment state is unresolvable. + # An empty list is the only value that safely means "unattached". + if not isinstance(disk.users, list): + continue + # spec 8.6: any current user entry means attached → skip + if disk.users: continue - # Extract short disk type from full resource URL + # Extract short disk type from full resource URL. # e.g. "zones/us-central1-a/diskTypes/pd-ssd" -> "pd-ssd" + # spec 7: fallback to "unknown" (not a guessed default type) disk_type_url = disk.type_ or "" - disk_type = disk_type_url.split("/")[-1] if disk_type_url else "pd-standard" + disk_type = disk_type_url.split("/")[-1] if disk_type_url else "unknown" - size_gb = int(disk.size_gb) if disk.size_gb is not None else 0 - cost_per_gb = _DISK_TYPE_COST_PER_GB.get(disk_type, _DEFAULT_COST_PER_GB) - monthly_cost = round(size_gb * cost_per_gb, 2) + # spec 7: parse size as non-negative int; use 0 on malformed values + try: + size_gb = int(disk.size_gb) if disk.size_gb is not None else 0 + except (ValueError, TypeError): + size_gb = 0 labels = dict(disk.labels) if disk.labels else {} # Regional disks use a different resource path than zonal disks. - # lastAttachTimestamp / lastDetachTimestamp are [Output Only] fields - # confirmed in GCP Disk API: - # https://cloud.google.com/compute/docs/reference/rest/v1/disks if is_regional: resource_id = f"projects/{project_id}/regions/{region}/disks/{disk.name}" report_location = region @@ -121,19 +161,16 @@ def find_unattached_disks( resource_id = f"projects/{project_id}/zones/{zone_name}/disks/{disk.name}" report_location = zone_name - # Confidence: regional disks are often intentionally provisioned for HA - # (replicated across two zones); an unattached regional disk is more - # ambiguous than an unattached zonal disk. + # spec 9.4: confidence baseline confidence = ConfidenceLevel.MEDIUM if is_regional else ConfidenceLevel.HIGH # Modulate confidence by time since last detach. - # A disk detached < 24h ago may be mid-pipeline (VM deleted, disk - # deletion pending). After 7 days the disk is almost certainly orphaned. - last_detach_str = disk.last_detach_timestamp or "" + # spec 7: treat non-string timestamps as unknown rather than crashing. + raw_ts = disk.last_detach_timestamp + last_detach_str = raw_ts if isinstance(raw_ts, str) else "" hours_since_detach: Optional[float] = None if last_detach_str: try: - # GCP uses RFC3339; handle both "+HH:MM" and "Z" offsets ts = last_detach_str.replace("Z", "+00:00") last_detach = datetime.fromisoformat(ts) if last_detach.tzinfo is None: @@ -142,17 +179,17 @@ def find_unattached_disks( if hours_since_detach < 24: confidence = ConfidenceLevel.LOW elif hours_since_detach < 7 * 24 and not is_regional: - # Zonal disk detached 24h–7d ago: still plausibly in a pipeline. - # Regional disks stay at their MEDIUM base regardless. + # Zonal disk detached 24h–7d: plausibly still mid-pipeline. + # Regional disks remain at MEDIUM baseline. confidence = ConfidenceLevel.MEDIUM - except ValueError: - pass + except (ValueError, AttributeError): + pass # unparseable timestamp — keep baseline confidence signals_used = [ "Disk status: READY", "No VM users (users list empty)", - f"Disk type: {disk_type} (~${cost_per_gb}/GB/month storage)", - f"Size: {size_gb} GB -> ~${monthly_cost}/month (estimated, region-dependent)", + f"Disk type: {disk_type}", + f"Size: {size_gb} GB", ] if is_regional: signals_used.append( @@ -161,15 +198,16 @@ def find_unattached_disks( if hours_since_detach is not None: signals_used.append(f"Last detached: {hours_since_detach:.0f}h ago") + # spec 9.5 / 10.2: never claim a specific dollar cost — pricing varies signals_not_checked = [ + "Exact monthly cost (varies by disk type, region, currency, and provisioned performance — see GCP billing)", "Disk reserved for imminent VM recreation", - "Snapshot-only workflow (intentional detachment)", + "Snapshot, image, or template restore dependency (disk may be intentionally detached)", "Cross-project disk sharing", ] if disk_type in _HYPERDISK_TYPES: signals_not_checked.append( - f"Hyperdisk IOPS and throughput charges are billed separately from " - f"capacity — actual monthly cost is likely higher than ~${monthly_cost}" + "Hyperdisk IOPS and throughput charges are billed separately from capacity cost" ) if disk_type == "pd-extreme": signals_not_checked.append( @@ -201,8 +239,7 @@ def find_unattached_disks( summary=( f"Persistent disk '{disk.name}' ({size_gb} GB, {disk_type}) " f"in {'region' if is_regional else 'zone'} '{report_location}' " - f"is not attached to any VM but continues to incur storage " - f"charges (~${monthly_cost}/month, estimated, region-dependent)." + f"is not attached to any VM but continues to incur storage charges." ), reason="Disk has no attached VM (users list is empty)", risk=RiskLevel.MEDIUM, @@ -214,7 +251,8 @@ def find_unattached_disks( time_window=None, ), details=details, - estimated_monthly_cost_usd=(monthly_cost if monthly_cost > 0 else None), + # spec 9.5.1: cost model is None — pricing varies by type/region/currency + estimated_monthly_cost_usd=None, ) ) diff --git a/cleancloud/providers/gcp/rules/ip_unused.py b/cleancloud/providers/gcp/rules/ip_unused.py index 71e7d85..8bfead3 100644 --- a/cleancloud/providers/gcp/rules/ip_unused.py +++ b/cleancloud/providers/gcp/rules/ip_unused.py @@ -1,3 +1,48 @@ +""" +Rule: gcp.compute.ip.unused + + (spec — docs/specs/gcp/ip_unused.md) + +Intent: + Detect static external IPv4 address reservations currently in RESERVED state + so they can be reviewed as conservative cleanup candidates. + +Exclusions: + - address record malformed or name absent/empty (spec 8.1) + - regional aggregated scope key unsupported or malformed (spec 8.2) + - region filter set and normalized regional scope does not match (spec 8.3) + - address is global and a region filter is set (spec 8.4) + - status not exactly "RESERVED" (spec 8.5) + - addressType absent, unknown, or not exactly "EXTERNAL" (spec 8.6) + - ipVersion absent, unknown, or not exactly "IPV4" (spec 8.7) + - purpose == "NAT_AUTO" (spec 8.8) + - users[] resolves to one or more entries (spec 8.9) + +Detection: + - status == "RESERVED" + - addressType == "EXTERNAL" + - ipVersion == "IPV4" + - purpose != "NAT_AUTO" + - users[] empty or absent + - covers regional (regions/REGION) and global addresses + +Confidence (spec 10.1): + - HIGH for all findings + +Risk (spec 10.2): + - LOW for all findings + +Cost model (spec 9.7): + - estimated_monthly_cost_usd = 7.30 + - Derived from Google's documented $0.01/hour for unused static external IPv4 + × 730-hour normalized month. + +APIs: + - compute.addresses.list (via addresses.aggregatedList) + - compute.globalAddresses.list +""" + +import warnings from datetime import datetime, timezone from typing import List, Optional @@ -9,14 +54,8 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -# GCP charges for static IPs that are reserved but not attached to a running resource. -# PREMIUM network tier (default): $0.010/hour = ~$7.20/month per unused IP. -# STANDARD network tier: lower cost — verify current rate at -# https://cloud.google.com/vpc/network-pricing#ipaddress -# Global external IPs are always PREMIUM. Only regional IPs can be STANDARD. -# We use the PREMIUM rate as the conservative estimate for all tiers; when a -# regional IP is STANDARD, we note it in the finding so users can verify actual cost. -_UNUSED_IP_COST_USD_MONTH = 7.20 # PREMIUM tier reference +# spec 9.7: $0.01/hour × 730-hour normalized month = $7.30/month +_UNUSED_IP_COST_USD_MONTH = 7.30 def find_unused_static_ips( @@ -26,17 +65,13 @@ def find_unused_static_ips( region_filter: Optional[str] = None, ) -> List[Finding]: """ - Find reserved external IP addresses not attached to any resource. + Find reserved external IPv4 addresses not in use. - GCP bills ~$7.20/month for each static IP in RESERVED status (allocated but - not in use). These accumulate when VMs, load balancers, or NAT gateways are - deleted without releasing their reserved IPs. + GCP bills $7.30/month (estimated) for each static external IPv4 address in + RESERVED status. These accumulate when VMs, load balancers, or NAT gateways + are deleted without releasing their reserved IPs. - Covers both regional and global external IPs. - - Detection logic: - - Regional IP: status == RESERVED (not IN_USE) - - Global IP: status == RESERVED (not IN_USE) + Covers both regional and global external IPv4 addresses. IAM permissions required: - compute.addresses.list (included in roles/compute.viewer) @@ -45,86 +80,155 @@ def find_unused_static_ips( findings: List[Finding] = [] now = datetime.now(timezone.utc) - # --- Regional IPs --- + # --- Regional IPs (addresses.aggregatedList) --- # aggregated_list() returns a lazy pager — PermissionDenied fires during # iteration (not at call time), so the try/except must wrap the full loop. + # return_partial_success=True allows partial results when some scopes are + # unreachable rather than failing the entire call (spec 9.1.2). + # Response scope keys for addresses: "regions/REGION". + # See: https://cloud.google.com/compute/docs/reference/rest/v1/addresses/aggregatedList addresses_client = compute_v1.AddressesClient(credentials=credentials) try: - for region_scope, region_addresses in addresses_client.aggregated_list(project=project_id): - if not region_addresses.addresses: - continue + pager = addresses_client.aggregated_list( + request={"project": project_id, "return_partial_success": True} + ) + for page in pager.pages: + # spec 9.1.6-7: surface top-level page warning — callers must not treat + # zero findings as proof of full clean coverage. + _page_warning = getattr(page, "warning", None) + if _page_warning and getattr(_page_warning, "code", ""): + warnings.warn( + f"gcp.compute.ip.unused: aggregated inventory returned a top-level warning " + f"(code: {_page_warning.code}) — regional address coverage may be incomplete", + UserWarning, + stacklevel=2, + ) - # region_scope is like "regions/us-central1" - region_name = region_scope.split("/")[-1] + # spec 9.1.6-7: surface unreachable scopes + for unreachable_scope in getattr(page, "unreachables", None) or []: + warnings.warn( + f"gcp.compute.ip.unused: aggregated inventory could not reach scope " + f"'{unreachable_scope}' — findings from this scope are unavailable", + UserWarning, + stacklevel=2, + ) - if region_filter and region_name != region_filter: - continue + for scope_key, scope_addresses in (getattr(page, "items", None) or {}).items(): + # spec 9.1.6-7: surface scope-level warning + _scope_warning = getattr(scope_addresses, "warning", None) + if _scope_warning and getattr(_scope_warning, "code", ""): + warnings.warn( + f"gcp.compute.ip.unused: aggregated inventory returned partial " + f"coverage for scope '{scope_key}' " + f"(code: {_scope_warning.code}) — findings from this scope may be incomplete", + UserWarning, + stacklevel=2, + ) - for address in region_addresses.addresses: - if address.status != "RESERVED": - continue # IN_USE — attached to a resource - # Only EXTERNAL IPs incur the static IP reservation charge. - # INTERNAL addresses use VPC subnet allocation and are not billed this way. - if address.address_type and address.address_type != "EXTERNAL": + if not scope_addresses.addresses: continue - labels = dict(address.labels) if address.labels else {} - network_tier = address.network_tier or "PREMIUM" - - regional_signals_not_checked = [ - "IP held for imminent re-attachment", - "Compliance or security requirement to hold specific IP", - ] - if network_tier == "STANDARD": - regional_signals_not_checked.append( - "STANDARD tier IPs cost less than PREMIUM — cost shown is the " - "PREMIUM reference rate; verify actual rate at " - "cloud.google.com/vpc/network-pricing#ipaddress" - ) + # spec 8.2 / 7: supported form is exactly "regions/REGION" + scope_parts = scope_key.split("/") + if len(scope_parts) != 2 or scope_parts[0] != "regions": + continue # skip "global" and any other unexpected scope types + + region_name = scope_parts[1] + + # spec 8.3 + if region_filter and region_name != region_filter: + continue + + for address in scope_addresses.addresses: + # spec 8.1: skip malformed records with absent / empty name + if not address.name: + continue + + # spec 8.5: only RESERVED addresses are eligible + if address.status != "RESERVED": + continue + + # spec 8.6: addressType absent, unknown, or not exactly "EXTERNAL" → skip + if address.address_type != "EXTERNAL": + continue + + # spec 8.7: ipVersion absent, unknown, or not exactly "IPV4" → skip + if address.ip_version != "IPV4": + continue - findings.append( - Finding( - provider="gcp", - rule_id="gcp.compute.ip.unused", - resource_type="gcp.compute.address", - resource_id=f"projects/{project_id}/regions/{region_name}/addresses/{address.name}", - region=region_name, - title="Unused Reserved External IP", - summary=( - f"Regional static IP '{address.name}' ({address.address}) in " - f"'{region_name}' is reserved but not attached to any resource, " - f"billing ~${_UNUSED_IP_COST_USD_MONTH}/month (estimated)." - ), - reason="IP address status is RESERVED — not attached to any VM, LB, or NAT gateway", - risk=RiskLevel.LOW, - confidence=ConfidenceLevel.HIGH, - detected_at=now, - evidence=Evidence( - signals_used=[ - "Address status: RESERVED (not IN_USE)", - f"Address type: {address.address_type or 'EXTERNAL'}", - f"Network tier: {network_tier}", - f"IP: {address.address}", - f"~${_UNUSED_IP_COST_USD_MONTH}/month (PREMIUM tier reference, estimated)", - ], - signals_not_checked=regional_signals_not_checked, - time_window=None, - ), - details={ - "address_name": address.name, - "ip_address": address.address, - "address_type": address.address_type or "EXTERNAL", - "purpose": address.purpose or None, - "region": region_name, - "scope": "regional", - "is_regional": True, - "network_tier": network_tier, - "creation_timestamp": address.creation_timestamp or None, - "labels": labels, - }, - estimated_monthly_cost_usd=_UNUSED_IP_COST_USD_MONTH, + # spec 8.8: NAT_AUTO addresses are Cloud NAT automatic allocations, + # not customer-held unused reservations + if address.purpose == "NAT_AUTO": + continue + + # spec 8.9: non-empty users[] is contradictory current-use evidence + if address.users: + continue + + labels = dict(address.labels) if address.labels else {} + # spec 7: preserve exact documented value; do not guess if absent + network_tier = address.network_tier or None + + signals_not_checked = [ + "IP held for imminent re-attachment or manual failover", + "DNS, firewall allowlist, or customer integration dependencies", + "Operational reserve, cutover, or HA intent", + "Contract-specific or non-USD billing differences", + ] + if network_tier == "STANDARD": + signals_not_checked.append( + "STANDARD tier IPs cost less than PREMIUM — cost shown is the " + "PREMIUM reference rate; verify actual rate at " + "cloud.google.com/vpc/network-pricing#ipaddress" + ) + + findings.append( + Finding( + provider="gcp", + rule_id="gcp.compute.ip.unused", + resource_type="gcp.compute.address", + resource_id=f"projects/{project_id}/regions/{region_name}/addresses/{address.name}", + region=region_name, + title="Unused Reserved External IP", + summary=( + f"Regional static IPv4 address '{address.name}' ({address.address}) in " + f"'{region_name}' is reserved but not attached to any resource, " + f"billing ~${_UNUSED_IP_COST_USD_MONTH}/month (estimated, " + f"public USD list pricing at $0.01/hr)." + ), + reason="Address status is RESERVED and no contradictory current-use evidence was found", + risk=RiskLevel.LOW, + confidence=ConfidenceLevel.HIGH, + detected_at=now, + evidence=Evidence( + signals_used=[ + "Address status: RESERVED (not IN_USE)", + "Address type: EXTERNAL", + "IP version: IPv4", + "Scope: regional", + f"Network tier: {network_tier or 'unknown'}", + f"IP: {address.address}", + f"~${_UNUSED_IP_COST_USD_MONTH}/month (estimated public USD list price at $0.01/hr × 730h)", + ], + signals_not_checked=signals_not_checked, + time_window=None, + ), + details={ + "address_name": address.name, + "ip_address": address.address or None, + "scope": "regional", + "is_regional": True, + "address_type": address.address_type, + "ip_version": address.ip_version, + "purpose": address.purpose or None, + "network_tier": network_tier, + "region": region_name, + "creation_timestamp": address.creation_timestamp or None, + "labels": labels, + }, + estimated_monthly_cost_usd=_UNUSED_IP_COST_USD_MONTH, + ) ) - ) except (PermissionDenied, Forbidden) as e: raise PermissionError( @@ -135,22 +239,43 @@ def find_unused_static_ips( # Compute Engine API not enabled for this project — return empty return findings - # --- Global IPs --- - # Graceful degradation: if global IP permission is denied, return regional findings - # rather than failing the entire rule. Global IPs are less common and the caller - # already has actionable regional results. + # spec 8.4 / 9.6: global addresses have no regional scope — skip when region filter active if region_filter: - # Global IPs have no region — skip when region filter is active return findings + # --- Global IPs (globalAddresses.list) --- global_addresses_client = compute_v1.GlobalAddressesClient(credentials=credentials) try: for address in global_addresses_client.list(project=project_id): + # spec 8.1 + if not address.name: + continue + + # spec 8.5 if address.status != "RESERVED": - continue # IN_USE + continue + + # spec 8.6 + if address.address_type != "EXTERNAL": + continue + + # spec 8.7 + if address.ip_version != "IPV4": + continue + + # spec 8.8 + if address.purpose == "NAT_AUTO": + continue + + # spec 8.9 + if address.users: + continue labels = dict(address.labels) if address.labels else {} - # Global IPs are always PREMIUM tier (regional-only IPs can be STANDARD) + # spec 7: preserve exact documented value; do not guess if absent. + # GCP documents global IPs are always PREMIUM — the API normally returns + # "PREMIUM", but store None rather than fabricating a value when absent. + network_tier = address.network_tier or None findings.append( Finding( @@ -161,36 +286,41 @@ def find_unused_static_ips( region="global", title="Unused Reserved Global IP", summary=( - f"Global static IP '{address.name}' ({address.address}) is reserved " + f"Global static IPv4 address '{address.name}' ({address.address}) is reserved " f"but not attached to any resource, billing ~${_UNUSED_IP_COST_USD_MONTH}/month " - f"(estimated)." + f"(estimated, public USD list pricing at $0.01/hr)." ), - reason="Global IP address status is RESERVED — not attached to any load balancer", + reason="Global address status is RESERVED and no contradictory current-use evidence was found", risk=RiskLevel.LOW, confidence=ConfidenceLevel.HIGH, detected_at=now, evidence=Evidence( signals_used=[ "Address status: RESERVED (not IN_USE)", + "Address type: EXTERNAL", + "IP version: IPv4", "Scope: global", - "Network tier: PREMIUM (global IPs are always PREMIUM)", + f"Network tier: {network_tier or 'unknown'} (global IPs are documented as always PREMIUM by GCP)", f"IP: {address.address}", - f"~${_UNUSED_IP_COST_USD_MONTH}/month (PREMIUM tier reference, estimated)", + f"~${_UNUSED_IP_COST_USD_MONTH}/month (estimated public USD list price at $0.01/hr × 730h)", ], signals_not_checked=[ - "IP held for imminent load balancer creation", - "Compliance or security requirement to hold specific IP", + "IP held for imminent re-attachment or manual failover", + "DNS, firewall allowlist, or customer integration dependencies", + "Operational reserve, cutover, or HA intent", + "Contract-specific or non-USD billing differences", ], time_window=None, ), details={ "address_name": address.name, - "ip_address": address.address, - "address_type": address.address_type or "EXTERNAL", - "purpose": address.purpose or None, + "ip_address": address.address or None, "scope": "global", "is_regional": False, - "network_tier": "PREMIUM", + "address_type": address.address_type, + "ip_version": address.ip_version, + "purpose": address.purpose or None, + "network_tier": network_tier, "creation_timestamp": address.creation_timestamp or None, "labels": labels, }, @@ -198,8 +328,14 @@ def find_unused_static_ips( ) ) - except (PermissionDenied, Forbidden, NotFound): - # Partial degradation: return regional findings even if global IPs are inaccessible + except (PermissionDenied, Forbidden) as e: + # spec 9.8.2: global permission failures must surface as a permission error + # during full-scope scans; silent degradation to regional-only is not acceptable. + raise PermissionError( + f"compute.globalAddresses.list permission required (roles/compute.viewer): " + f"{getattr(e, 'message', str(e))}" + ) from e + except NotFound: pass return findings diff --git a/cleancloud/providers/gcp/rules/snapshot_old.py b/cleancloud/providers/gcp/rules/snapshot_old.py index 9a6af6b..1bd8bd9 100644 --- a/cleancloud/providers/gcp/rules/snapshot_old.py +++ b/cleancloud/providers/gcp/rules/snapshot_old.py @@ -1,5 +1,45 @@ +""" +Rule: gcp.compute.snapshot.old + + (spec — docs/specs/gcp/snapshot_old.md) + +Intent: + Detect old standard snapshot resources that are conservative cleanup review + candidates after excluding stronger Google-documented signals that the + snapshot is part of an intentional automated backup workflow. + +Exclusions: + - snapshot record malformed or name absent/empty (spec 8.1) + - status not exactly "READY" (spec 8.2) + - creationTimestamp absent or unparsable (spec 8.3) + - age_days < max_age_days (spec 8.4) + - snapshotType == "ARCHIVE" (spec 8.5) + - sourceSnapshotSchedulePolicy or sourceSnapshotSchedulePolicyId present + and non-empty (spec 8.6) + - autoCreated == True (spec 8.7) + +Detection: + - status == "READY" + - creationTimestamp parsable + - age_days >= max_age_days + - not archive, not schedule-created, not auto-created + +Confidence (spec 9.8): + - LOW for all findings + +Risk (spec 9.9): + - LOW for all findings + +Cost model (spec 9.7): + - estimated_monthly_cost_usd = None + - No flat per-GB rate; pricing varies by snapshot type and storage location. + +APIs: + - compute.snapshots.list +""" + import re -from datetime import datetime, timedelta, timezone +from datetime import datetime, timezone from typing import List, Optional from google.api_core.exceptions import Forbidden, NotFound, PermissionDenied @@ -10,10 +50,6 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -# GCP snapshot storage pricing: ~$0.026/GB/month (multi-regional standard) -# Source: https://cloud.google.com/compute/disks-image-pricing#disk_snapshots -_SNAPSHOT_COST_PER_GB_MONTH = 0.026 - _BYTES_PER_GB = 1024**3 @@ -37,31 +73,20 @@ def find_old_snapshots( max_age_days: int = 90, ) -> List[Finding]: """ - Find disk snapshots older than 90 days. + Find disk snapshots older than max_age_days that are not part of automated + backup workflows. - GCP snapshots are stored in Cloud Storage and billed at ~$0.026/GB/month. - Snapshots accumulate silently — automated snapshot policies are frequently - removed while their snapshots are left behind, and one-off manual snapshots - are rarely cleaned up. 90 days is a reliable threshold that avoids flagging - routine backup cycles while catching chronic waste. + Old snapshots accumulate silently after VM deletion, manual one-off backups, + or abandoned snapshot schedules. This rule excludes archive, schedule-created, + and auto-created snapshots as stronger signals of intentional backup workflows. - Confidence is HIGH when the source disk no longer exists (clear orphan), - MEDIUM when the source disk is still present (might be intentional long-term - backup or DR snapshot). - - Snapshots are global resources — region_filter is not applied (they have no - region; source disk zone is an unreliable proxy for filtering intent). - - Detection logic: - - Snapshot creation timestamp older than `max_age_days` days - - Snapshot status == READY + Snapshots are global resources — region_filter is ignored (spec 9.1.3). IAM permissions required: - compute.snapshots.list (included in roles/compute.viewer) """ findings: List[Finding] = [] now = datetime.now(timezone.utc) - cutoff = now - timedelta(days=max_age_days) snapshots_client = compute_v1.SnapshotsClient(credentials=credentials) @@ -69,71 +94,125 @@ def find_old_snapshots( # (not at call time), so the try/except must wrap the full loop. try: for snapshot in snapshots_client.list(project=project_id): + # spec 8.1: skip malformed records with absent / empty name + if not snapshot.name: + continue + + # spec 8.2 / 9.2: only READY snapshots are stably evaluable if snapshot.status != "READY": continue + # spec 8.3 / 9.3: creationTimestamp must be parsable; skip if absent or malformed created_at = _parse_gcp_timestamp(snapshot.creation_timestamp or "") - if created_at is None or created_at > cutoff: + if created_at is None: + continue + + # spec 8.4 / 9.3: emit only when age_days >= max_age_days + age_days = (now - created_at).days + if age_days < max_age_days: + continue + + # spec 8.5 / 9.5: archive snapshots are a low-cost long-retention class — skip + if snapshot.snapshot_type == "ARCHIVE": + continue + + # spec 8.6 / 9.4: schedule-created snapshots are intentional recurring backups + # not in (None, "") is more explicit than truthiness: empty string is "not present" + if snapshot.source_snapshot_schedule_policy not in (None, ""): + continue + if snapshot.source_snapshot_schedule_policy_id not in (None, ""): continue - max_age_days_actual = (now - created_at).days + # spec 8.7 / 9.4: auto-created snapshots are intentional backups + if getattr(snapshot, "auto_created", None): + continue - # Empty source_disk means the source disk has been deleted — clear orphan - source_disk = snapshot.source_disk or "" - source_disk_deleted = not bool(source_disk) + # Normalize storage fields for context only (spec 9.7 / 7) + # spec 7: non-negative integer; negative values normalize to 0 + try: + disk_size_gb = max(0, int(snapshot.disk_size_gb)) if snapshot.disk_size_gb else 0 + except (ValueError, TypeError): + disk_size_gb = 0 - # Higher confidence when source disk is gone (orphaned snapshot) - confidence = ConfidenceLevel.HIGH if source_disk_deleted else ConfidenceLevel.MEDIUM + try: + storage_bytes = max(0, int(snapshot.storage_bytes)) if snapshot.storage_bytes else 0 + except (ValueError, TypeError): + storage_bytes = 0 + + # spec 7: preserve exact documented values; None when absent + storage_bytes_status = snapshot.storage_bytes_status or None + snapshot_type = snapshot.snapshot_type or None + # getattr preserves explicit None/unknown on malformed or minimal mock objects + auto_created = getattr(snapshot, "auto_created", None) + # resolve chain_name: prefer SDK snake_case; camelCase as fallback for raw objects + chain_name = ( + getattr(snapshot, "chain_name", None) or getattr(snapshot, "chainName", None) or "" + ) - # Use actual compressed storage bytes if available; fall back to disk size - storage_bytes = int(snapshot.storage_bytes or 0) - disk_size_gb = int(snapshot.disk_size_gb or 0) - billable_gb = (storage_bytes / _BYTES_PER_GB) if storage_bytes > 0 else disk_size_gb - monthly_cost = round(billable_gb * _SNAPSHOT_COST_PER_GB_MONTH, 2) + # spec 9.10: malformed context fields must not fail the whole rule + try: + labels = dict(snapshot.labels) if snapshot.labels else {} + except Exception: + labels = {} - labels = dict(snapshot.labels) if snapshot.labels else {} + try: + storage_locations = ( + list(snapshot.storage_locations) if snapshot.storage_locations else [] + ) + except Exception: + storage_locations = [] - signals = [ - f"Snapshot age: {max_age_days_actual} days (created {created_at.date().isoformat()})", + # spec 10.2: signals_used must disclose status, age, threshold, storage context + signals_used = [ "Status: READY", - f"Disk size: {disk_size_gb} GB", + f"Snapshot age: {age_days} days (threshold: {max_age_days} days)", + f"Created: {created_at.date().isoformat()}", ] - if storage_bytes > 0: - signals.append( - f"Actual stored size: {billable_gb:.1f} GB -> ~${monthly_cost}/month" - ) - else: - signals.append(f"Estimated cost: ~${monthly_cost}/month (disk size used as proxy)") - if source_disk_deleted: - signals.append( - "Source disk reference missing — likely orphaned snapshot " - "(GCP clears sourceDisk when the backing disk is deleted)" - ) - else: - signals.append(f"Source disk: {source_disk.split('/')[-1]}") + if snapshot_type: + signals_used.append(f"Snapshot type: {snapshot_type}") + # spec 3.3 / 9.7: diskSizeGb is source disk size, not billed snapshot size + signals_used.append( + f"Source disk size: {disk_size_gb} GB (not the billed snapshot storage size)" + ) + # spec 9.7: storageBytes as context only, including when very small or zero + storage_gb = storage_bytes / _BYTES_PER_GB + status_note = f" ({storage_bytes_status})" if storage_bytes_status else "" + signals_used.append( + f"Billed storage (storageBytes): {storage_gb:.1f} GB{status_note} — context only; " + f"deleting may not reclaim full amount due to incremental sharing" + ) + if chain_name: + signals_used.append(f"Snapshot is part of a named incremental chain: {chain_name}") - details = { + # spec 10.3: required details fields + details: dict = { "snapshot_name": snapshot.name, + "created_at": created_at.isoformat(), + "age_days": age_days, + "max_age_days_threshold": max_age_days, "disk_size_gb": disk_size_gb, "storage_bytes": storage_bytes, - "max_age_days": max_age_days_actual, - "max_age_days_threshold": max_age_days, - "created_at": created_at.isoformat(), - "source_disk_deleted": source_disk_deleted, - # storage_locations: ["us-central1"] = regional, ["us"] = multi-regional. - # Affects pricing — multi-regional ($0.026/GB) costs more than regional. - "storage_locations": ( - list(snapshot.storage_locations) if snapshot.storage_locations else [] - ), + "storage_bytes_status": storage_bytes_status, + "storage_locations": storage_locations, + "snapshot_type": snapshot_type, + "auto_created": auto_created, "labels": labels, } - if not source_disk_deleted: - details["source_disk"] = source_disk.split("/")[-1] - details["source_disk_url"] = source_disk # full URL for cross-project lookup + # Conditionally include optional fields when present (spec 10.3) + if snapshot.source_snapshot_schedule_policy not in (None, ""): + details["source_snapshot_schedule_policy"] = ( + snapshot.source_snapshot_schedule_policy + ) + if snapshot.source_snapshot_schedule_policy_id not in (None, ""): + details["source_snapshot_schedule_policy_id"] = ( + snapshot.source_snapshot_schedule_policy_id + ) + if snapshot.source_disk: + details["source_disk"] = snapshot.source_disk if snapshot.source_disk_id: - details["source_disk_id"] = snapshot.source_disk_id # stable numeric ID - if snapshot.chain_name: - details["chain_name"] = snapshot.chain_name # non-empty only when explicitly set + details["source_disk_id"] = snapshot.source_disk_id + if chain_name: + details["chain_name"] = chain_name findings.append( Finding( @@ -142,31 +221,34 @@ def find_old_snapshots( resource_type="gcp.compute.snapshot", resource_id=f"projects/{project_id}/global/snapshots/{snapshot.name}", region="global", - title=f"Old Disk Snapshot ({max_age_days_actual} Days)", + title=f"Old Disk Snapshot ({age_days} Days)", summary=( - f"Snapshot '{snapshot.name}' ({disk_size_gb} GB) is {max_age_days_actual} days old" - + (" and its source disk no longer exists." if source_disk_deleted else ".") - + f" Estimated storage cost: ~${monthly_cost}/month." + f"Snapshot '{snapshot.name}' is {age_days} days old " + f"and has not been identified as part of an automated backup workflow." + ), + reason=( + f"Snapshot is {age_days} days old (threshold: {max_age_days} days) " + f"and no schedule-created or auto-created evidence was found" ), - reason=f"Snapshot is {max_age_days_actual} days old (threshold: {max_age_days} days)", risk=RiskLevel.LOW, - confidence=confidence, + confidence=ConfidenceLevel.LOW, detected_at=now, evidence=Evidence( - signals_used=signals, + signals_used=signals_used, signals_not_checked=[ - "Compliance or regulatory data retention requirements", - "Disaster recovery snapshot policy", - "Part of an active backup rotation", - "Snapshot storage is incremental — deleting this snapshot may not " - "fully reclaim its estimated cost if adjacent snapshots share blocks", - "Snapshot storage location (regional vs multi-regional) may affect " - "pricing (rule uses multi-regional rate of $0.026/GB/month)", + "Business or application retention intent", + "DR, audit, or compliance requirements", + "Snapshot restore frequency or operational usage was not evaluated", + "Whether deleting this snapshot would materially reduce billed " + "storage (snapshots are incremental — adjacent snapshots may " + "share data blocks)", + "Exact monthly pricing from current storage location and snapshot type", ], time_window=f"{max_age_days} days", ), details=details, - estimated_monthly_cost_usd=(monthly_cost if monthly_cost > 0 else None), + # spec 9.7: no flat per-GB estimate; pricing varies by type and location + estimated_monthly_cost_usd=None, ) ) diff --git a/cleancloud/providers/gcp/rules/sql_instance_idle.py b/cleancloud/providers/gcp/rules/sql_instance_idle.py index 7926c71..81062b4 100644 --- a/cleancloud/providers/gcp/rules/sql_instance_idle.py +++ b/cleancloud/providers/gcp/rules/sql_instance_idle.py @@ -1,6 +1,52 @@ +""" +Rule: gcp.sql.instance.idle + + (spec — docs/specs/gcp/sql_instance_idle.md) + +Intent: + Detect primary Cloud SQL instances that show no observed active database + connections for the full configured idle window and therefore represent + conservative review candidates for cleanup, stop/start reconsideration, + or rightsizing. + + This is a conservative review-candidate rule only. It is not proof that + the instance is safe to delete, not proof that no business continuity + purpose exists, and not proof of a specific monthly saving. + +Exclusions: + - instance record malformed or name absent / empty (spec 8.1) + - region absent / empty (spec 8.2) + - region filter set and region does not match (spec 8.3) + - state absent, unknown, or not exactly "RUNNABLE" (spec 8.4) + - instanceType absent, unknown, or not exactly "CLOUD_SQL_INSTANCE" (spec 8.5) + - replica exclusion contract triggered (masterInstanceName present) (spec 8.6) + - createTime absent or unparsable (spec 8.7) + - instance newer than window_start (spec 8.8) + - active_connections metric cannot be resolved reliably (spec 8.9) + - active_connections_max > 0 anywhere in the window (spec 8.10) + +Detection: + - state == "RUNNABLE" + - instanceType == "CLOUD_SQL_INSTANCE" + - masterInstanceName absent / empty + - createTime parsable and instance older than window_start + - active_connections_max == 0 for the full window + +Cost model (spec 9.10): + estimated_monthly_cost_usd = None + Pricing varies by edition, region, compute shape, HA, storage, and + commitment model; no flat tier estimate is appropriate. + +APIs: + - sqladmin.googleapis.com/sql/v1beta4/projects/{project}/instances + - monitoring.googleapis.com: cloudsql.googleapis.com/database/active_connections + on cloudsql_database monitored resource +""" + from datetime import datetime, timedelta, timezone from typing import List, Optional +from google.api_core.exceptions import Forbidden, PermissionDenied from google.auth.transport.requests import AuthorizedSession from google.cloud import monitoring_v3 from google.protobuf import timestamp_pb2 @@ -10,37 +56,37 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -# Approximate Cloud SQL monthly cost by machine tier (us-central1, HA disabled) -# Source: https://cloud.google.com/sql/pricing -_CLOUD_SQL_COST_USD: dict = { - "db-f1-micro": 7.67, - "db-g1-small": 25.22, - "db-n1-standard-1": 46.55, - "db-n1-standard-2": 93.10, - "db-n1-standard-4": 186.19, - "db-n1-standard-8": 372.39, - "db-n1-standard-16": 744.78, - "db-n1-highmem-2": 113.45, - "db-n1-highmem-4": 226.90, - "db-n1-highmem-8": 453.80, - "db-n1-highmem-16": 907.60, - "db-custom-1-3840": 53.52, - "db-custom-2-7680": 107.04, - "db-custom-4-15360": 214.08, -} - -_DAYS_IDLE = 14 +# spec 6.3 / 9.5: Cloud SQL metrics are sampled every 60s and can be delayed +# up to 165s. A 5-minute buffer conservatively covers documented visibility lag. +_MONITORING_LAG_BUFFER = timedelta(minutes=5) + +# spec 9.6.9: coverage quality thresholds. +# Maximum tolerated consecutive gap between observed data points. Accounts for +# the documented 60 s sampling period + up to 165 s visibility lag, plus a +# conservative buffer for occasional missed samples. +_MAX_COVERAGE_GAP = timedelta(minutes=10) +# Tolerated offset between window boundary and first/last observed point. +# Accounts for sampling alignment and in-flight visibility lag at window edges. +_COVERAGE_EDGE_TOLERANCE = timedelta(minutes=10) + + +def _parse_create_time(ts: str) -> Optional[datetime]: + """Parse an RFC3339 createTime string to a UTC-aware datetime, or return None.""" + if not ts: + return None + try: + dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) + except (ValueError, AttributeError): + return None def _list_sql_instances(project_id: str, credentials) -> list: """ List all Cloud SQL instances using the Cloud SQL Admin REST API. - Uses AuthorizedSession (google-auth) — automatically handles token refresh - and avoids requiring google-api-python-client as an additional dependency. - - Raises PermissionError on 403 so the caller can gracefully skip this rule. - Returns [] on 404 (Cloud SQL API not enabled for the project). + Raises PermissionError on 403 (spec 9.13.1). + Returns [] on 404 (Cloud SQL API not enabled — spec 9.13.3). """ session = AuthorizedSession(credentials) resp = session.get( @@ -49,57 +95,132 @@ def _list_sql_instances(project_id: str, credentials) -> list: if resp.status_code == 403: raise PermissionError("cloudsql.instances.list permission required (roles/cloudsql.viewer)") if resp.status_code == 404: - return [] # Cloud SQL API not enabled for this project + return [] resp.raise_for_status() return resp.json().get("items", []) -def _has_connections( +def _query_active_connections( monitoring_client: monitoring_v3.MetricServiceClient, project_id: str, instance_name: str, - idle_days: int = _DAYS_IDLE, -) -> bool: + instance_region: str, + window_start: datetime, + window_end: datetime, +) -> Optional[float]: """ - Query Cloud Monitoring for database connections over the last `idle_days` days. + Query cloudsql.googleapis.com/database/active_connections for one instance. + + Matches by exact documented cloudsql_database monitored-resource identity + labels (project_id, location, resource_id). Aggregates across all matched + series to handle the database label dimension (spec 9.6–9.7). + + Also evaluates coverage quality (spec 9.6.8–9.6.9): the observed timestamps + must span the full window within _COVERAGE_EDGE_TOLERANCE, and no consecutive + gap between observed points may exceed _MAX_COVERAGE_GAP. - Returns True if any connections detected (active instance). - Returns True on any error — conservative fallback avoids false positives. + Returns: + float >= 0.0 — active_connections_max; 0.0 means confirmed idle + None — unresolved coverage (no series / no points / partial + window / large gap / unreadable timestamps / failure); + caller must skip (spec 8.9) + + Raises: + PermissionError — monitoring.timeSeries.list permission denied (spec 9.13.2) """ try: - now = datetime.now(timezone.utc) - start = now - timedelta(days=idle_days) - end_ts = timestamp_pb2.Timestamp() - end_ts.FromDatetime(now) + end_ts.FromDatetime(window_end) start_ts = timestamp_pb2.Timestamp() - start_ts.FromDatetime(start) + start_ts.FromDatetime(window_start) interval = monitoring_v3.TimeInterval(start_time=start_ts, end_time=end_ts) + # spec 9.6.3: exact label matching — project_id, location, resource_id + filter_str = ( + 'metric.type="cloudsql.googleapis.com/database/active_connections"' + ' AND resource.type="cloudsql_database"' + f' AND resource.labels.project_id="{project_id}"' + f' AND resource.labels.location="{instance_region}"' + f' AND resource.labels.resource_id="{instance_name}"' + ) + results = monitoring_client.list_time_series( request={ "name": f"projects/{project_id}", - "filter": ( - 'metric.type="cloudsql.googleapis.com/database/network/connections"' - f' AND resource.labels.database_id="{project_id}:{instance_name}"' - ), + "filter": filter_str, "interval": interval, "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL, } ) + # spec 9.7: aggregate across all matched series (all database label variants) + has_series = False + has_points = False + max_val = 0.0 + all_timestamps: list = [] + for series in results: + has_series = True for point in series.points: - val = point.value.int64_value or int(point.value.double_value or 0) - if val > 0: - return True - - return False # No connections detected over the window - + has_points = True + which = point.value.WhichOneof("value") + if which == "int64_value": + val = float(point.value.int64_value) + elif which == "double_value": + val = float(point.value.double_value) + else: + # Unrecognised or unset value type → unresolved coverage → skip + return None + if val > max_val: + max_val = val + # Collect timestamp for coverage quality evaluation (spec 9.6.8–9.6.9). + # Any parse failure means coverage cannot be verified → unresolved. + try: + ts = point.interval.end_time + all_timestamps.append( + datetime.fromtimestamp(ts.seconds + ts.nanos / 1e9, tz=timezone.utc) + ) + except Exception: + # spec 9.6.8: parse failure → unresolved coverage → skip + return None + + if not has_series: + # spec 9.6.7: no time series → unresolved coverage → skip + return None + if not has_points: + # spec 9.6.8: series present but no data points → unusable → skip + return None + if not all_timestamps: + # no readable timestamps → cannot verify coverage → skip + return None + + # spec 9.6.9: coverage quality — partial-window or materially sparse → skip + # Deduplicate before the gap check so identical timestamps from multiple + # series don't produce spurious zero-length intervals. + all_timestamps = sorted(set(all_timestamps)) + if all_timestamps[0] > window_start + _COVERAGE_EDGE_TOLERANCE: + # data starts too late — partial window coverage + return None + if all_timestamps[-1] < window_end - _COVERAGE_EDGE_TOLERANCE: + # data ends too early — partial window coverage + return None + for i in range(1, len(all_timestamps)): + if all_timestamps[i] - all_timestamps[i - 1] > _MAX_COVERAGE_GAP: + # large missing chunk in the middle of the window + return None + + return max_val + + except (PermissionDenied, Forbidden) as e: + # spec 9.13.2: monitoring permission failures must surface as permission error + raise PermissionError( + f"monitoring.timeSeries.list permission required (roles/monitoring.viewer): " + f"{getattr(e, 'message', str(e))}" + ) from e except Exception: - # Monitoring unavailable or permission denied — conservative: assume active - return True + # all other failures → unresolved coverage → skip (spec 9.13.5) + return None def find_idle_sql_instances( @@ -107,26 +228,15 @@ def find_idle_sql_instances( project_id: str, credentials, region_filter: Optional[str] = None, - idle_days: int = _DAYS_IDLE, + idle_days: int = 14, ) -> List[Finding]: """ - Find Cloud SQL instances with zero database connections for `idle_days` days. - - Cloud SQL bills continuously regardless of query load — an idle db-n1-standard-2 - costs ~$93/month with zero queries. Dev and staging databases are frequently - left running after feature branches merge or projects wind down. - - Only RUNNABLE instances are evaluated. Read replicas are excluded (no - independent billing — master instance cost is what matters). Instances in - SUSPENDED, FAILED, or MAINTENANCE states are skipped. - - Monitoring errors are treated conservatively: if Cloud Monitoring is - unavailable or permission-denied, the instance is assumed active (not flagged). + Find Cloud SQL instances with zero active connections for idle_days days. - Detection logic: - - Instance state == RUNNABLE - - Not a read replica (instanceType != READ_REPLICA_INSTANCE) - - Cloud Monitoring: max connections == 0 over last 14 days + Detection requires active_connections_max == 0 for the full observation + window on cloudsql.googleapis.com/database/active_connections matched by + exact cloudsql_database identity labels. Unresolved metric coverage causes + the instance to be skipped rather than flagged. IAM permissions required: - cloudsql.instances.list (roles/cloudsql.viewer) @@ -135,126 +245,153 @@ def find_idle_sql_instances( findings: List[Finding] = [] now = datetime.now(timezone.utc) - # PermissionError propagates to scan.py which records it as a skipped rule + # spec 6.3: window_end with lag buffer; window_start = window_end - idle_days + window_end = now - _MONITORING_LAG_BUFFER + window_start = window_end - timedelta(days=idle_days) + + # PermissionError propagates (spec 9.13.1) instances = _list_sql_instances(project_id, credentials) if not instances: return findings - # If Cloud Monitoring client cannot be created, skip rather than false-positive + # If monitoring client cannot be created, skip rather than false-positive (spec 9.13.4) try: monitoring_client = monitoring_v3.MetricServiceClient(credentials=credentials) except Exception: return findings for instance in instances: - state = instance.get("state", "") - if state != "RUNNABLE": - continue - - # Exclude read replicas — no independent cost basis - if instance.get("instanceType") == "READ_REPLICA_INSTANCE": + # spec 8.1: name must be present and non-empty + instance_name = instance.get("name", "") + if not instance_name: continue - instance_name = instance.get("name", "") + # spec 8.2: region must be present and non-empty region = instance.get("region", "") - database_version = instance.get("databaseVersion", "") - tier = (instance.get("settings") or {}).get("tier", "") + if not region: + continue + # spec 8.3: region filter — exact string equality if region_filter and region != region_filter: continue - # Skip instances created within the last 24 hours — zero connections on a - # brand-new instance is not a signal of waste. createTime is ISO 8601 UTC. - create_time_str = instance.get("createTime", "") - if create_time_str: - try: - created_at = datetime.fromisoformat(create_time_str.replace("Z", "+00:00")) - if created_at.tzinfo is None: - created_at = created_at.replace(tzinfo=timezone.utc) - if (now - created_at).total_seconds() < 86400: - continue - except ValueError: - pass - - # Conservative: if monitoring check fails, assume active — don't flag - if _has_connections(monitoring_client, project_id, instance_name, idle_days=idle_days): + # spec 8.4: only RUNNABLE is eligible + if instance.get("state") != "RUNNABLE": continue - settings = instance.get("settings") or {} + # spec 8.5: only primary CLOUD_SQL_INSTANCE is eligible + instance_type = instance.get("instanceType", "") + if instance_type != "CLOUD_SQL_INSTANCE": + continue - monthly_cost = _CLOUD_SQL_COST_USD.get(tier) - cost_signal = ( - f"Tier '{tier}' costs ~${monthly_cost}/month (compute only, no HA)" - if monthly_cost - else f"Tier: {tier or 'unknown'} (cost estimate unavailable)" - ) + # spec 8.6 / 9.4: replica exclusion — masterInstanceName present and non-empty + master_instance_name = instance.get("masterInstanceName", "") + if master_instance_name: + continue - # Zero connections for idle_days is a reliable signal regardless of cost. - # Use min_cost in cleancloud.yaml to suppress low-value findings instead. - confidence = ConfidenceLevel.HIGH + # spec 8.7 / 9.5: createTime must be parsable + created_at = _parse_create_time(instance.get("createTime", "")) + if created_at is None: + continue # absent or unparsable → skip - labels = settings.get("userLabels", {}) + # spec 8.8 / 9.5: instance must be old enough for the full observation window + if created_at > window_start: + continue - # HA doubles compute cost. availabilityType: "REGIONAL" = HA, "ZONAL" = no HA. - ha_enabled = settings.get("availabilityType") == "REGIONAL" + # spec 8.9–8.10 / 9.6–9.7: query documented active_connections metric + # PermissionError propagates (spec 9.13.2) + active_connections_max = _query_active_connections( + monitoring_client, + project_id, + instance_name, + region, + window_start, + window_end, + ) + + if active_connections_max is None: + continue # unresolved coverage → skip (spec 8.9) - # Storage size and type — billed separately from compute. - # Cloud SQL pricing: PD_SSD ~$0.17/GB/month, PD_HDD ~$0.09/GB/month. + if active_connections_max > 0: + continue # active → skip (spec 8.10) + + # --- All exclusions passed: build finding --- + settings = instance.get("settings") or {} + database_version = instance.get("databaseVersion", "") + tier = settings.get("tier", "") + availability_type = settings.get("availabilityType", "") + ha_enabled = availability_type == "REGIONAL" data_disk_size_gb = settings.get("dataDiskSizeGb") data_disk_type = settings.get("dataDiskType", "") - - # Backup retention — additional storage cost for retained backups. backup_cfg = settings.get("backupConfiguration") or {} backup_retention = (backup_cfg.get("backupRetentionSettings") or {}).get("retainedBackups") + labels = settings.get("userLabels") or {} - # Parse CPU and memory from custom tier names (format: db-custom-{cpu}-{memory_mb}). - cpu_count: Optional[int] = None - memory_gb: Optional[float] = None - if tier.startswith("db-custom-"): - parts = tier.split("-") - if len(parts) == 4: - try: - cpu_count = int(parts[2]) - memory_gb = round(int(parts[3]) / 1024, 1) - except ValueError: - pass - + # spec 10.2: signals_used must disclose state, type, metric coverage, connections, + # version, tier, HA context, and storage/backup context when present signals_used = [ "Instance state: RUNNABLE", - f"Zero TCP connections observed via Cloud Monitoring over " - f"{idle_days} days " - f"(metric: cloudsql.googleapis.com/database/network/connections; " - f"may not capture short-lived or non-TCP workloads)", - f"Database version: {database_version}", - cost_signal, + "Instance type: CLOUD_SQL_INSTANCE (primary)", + ( + f"Metric coverage: FULL for {idle_days}-day window " + f"(cloudsql.googleapis.com/database/active_connections " + f"on cloudsql_database)" + ), + f"active_connections_max = {active_connections_max:.0f} over {idle_days}-day window", + f"Database version: {database_version or 'unknown'}", + f"Tier: {tier or 'unknown'}", ] if ha_enabled: signals_used.append( - "HA enabled (availabilityType: REGIONAL) — actual compute cost is ~2x the estimate" + "HA enabled (availabilityType: REGIONAL) — regional instance " + "with primary and standby" ) - if data_disk_size_gb: + if data_disk_size_gb is not None: signals_used.append( - f"Storage: {data_disk_size_gb} GB ({data_disk_type or 'unknown type'}) — " + f"Storage: {data_disk_size_gb} GB " + f"({data_disk_type or 'unknown type'}) — " f"billed separately from compute" ) + if backup_retention is not None: + signals_used.append(f"Backup retention: {backup_retention} retained backups") - details = { + # spec 10.3: required details fields + details: dict = { "instance_name": instance_name, + "instance_type": instance_type, "database_version": database_version, "tier": tier, "region": region, + "created_at": created_at.isoformat(), + "idle_days_threshold": idle_days, + "metric_coverage": "FULL", + "active_connections_max": active_connections_max, "ha_enabled": ha_enabled, - "days_idle_threshold": idle_days, - "estimated_monthly_cost_usd": monthly_cost, + "availability_type": availability_type or None, "labels": labels, } + # conditional details (spec 10.3: when present) + if master_instance_name: + details["master_instance_name"] = master_instance_name if data_disk_size_gb is not None: details["data_disk_size_gb"] = data_disk_size_gb if data_disk_type: details["data_disk_type"] = data_disk_type if backup_retention is not None: details["backup_retained_count"] = backup_retention + + # Custom tier CPU/memory parsing — context only + cpu_count: Optional[int] = None + memory_gb: Optional[float] = None + if tier.startswith("db-custom-"): + parts = tier.split("-") + if len(parts) == 4: + try: + cpu_count = int(parts[2]) + memory_gb = round(int(parts[3]) / 1024, 1) + except ValueError: + pass if cpu_count is not None: details["cpu_count"] = cpu_count details["memory_gb"] = memory_gb @@ -268,30 +405,34 @@ def find_idle_sql_instances( region=region, title=f"Idle Cloud SQL Instance ({idle_days}+ Days)", summary=( - f"Cloud SQL instance '{instance_name}' ({database_version}, {tier}) " - f"in region '{region}' has had no observed database connections via " - f"Cloud Monitoring over {idle_days}+ days but continues to incur " - f"compute charges." + f"Cloud SQL instance '{instance_name}' " + f"({database_version or 'unknown'}, {tier or 'unknown'}) " + f"in region '{region}' shows no observed active connections over " + f"{idle_days}+ days." + ), + reason=( + f"active_connections_max == 0 over the {idle_days}-day observation window " + f"(cloudsql.googleapis.com/database/active_connections)" ), - reason=f"Zero database connections detected over the last {idle_days} days", risk=RiskLevel.HIGH, - confidence=confidence, + confidence=ConfidenceLevel.HIGH, detected_at=now, evidence=Evidence( signals_used=signals_used, signals_not_checked=[ - f"Short-lived or batch connections (cron jobs, ETL) not visible in Cloud Monitoring connection metrics over the {idle_days}-day window", - "Non-TCP workloads or Unix socket connections via Cloud SQL Proxy", - "Scheduled maintenance window", - "Planned reactivation for upcoming sprint", - "Read replicas (excluded from this rule)", - "Storage, backups, HA configuration, and network egress not included " - "in cost estimate — actual cost is often 2–5x higher", + "Short-lived workload bursts between metric samples were not evaluated", + "Business or application retention intent", + "Migration, failback, or future reactivation intent", + "Storage, backup, and network savings were not estimated", + "Engine-specific internal work not represented by active client " + "connections alone", ], time_window=f"{idle_days} days", ), details=details, - estimated_monthly_cost_usd=monthly_cost, + # spec 9.10: always None — pricing varies by edition, region, compute shape, + # HA, storage, and commitment model; no flat estimate is appropriate + estimated_monthly_cost_usd=None, ) ) diff --git a/cleancloud/providers/gcp/rules/vm_stopped.py b/cleancloud/providers/gcp/rules/vm_stopped.py index 7935507..4426244 100644 --- a/cleancloud/providers/gcp/rules/vm_stopped.py +++ b/cleancloud/providers/gcp/rules/vm_stopped.py @@ -1,5 +1,42 @@ -import re -from datetime import datetime, timedelta, timezone +""" +Rule: gcp.compute.vm.stopped + + (spec — docs/specs/gcp/vm_stopped.md) + +Intent: + Detect Compute Engine VM instances in the documented stopped lifecycle state + that have remained stopped for at least the configured threshold and therefore + represent conservative review candidates for cleanup of lingering + attached-cost surfaces. + + This is a conservative review-candidate rule only. It is not proof that + the VM is abandoned, not proof that attached resources should be deleted, + and not proof of a specific monthly saving. + +Exclusions: + - instance record malformed or name absent / empty (spec 8.1) + - aggregated scope key does not resolve to exact zones/ZONE (spec 8.2) + - region filter set and normalized region is unknown or does not match (spec 8.3) + - instance is proven to have active MIG membership (spec 8.4) + - normalized lifecycle state not STOPPED_VM (spec 8.5) + - lastStopTimestamp absent or unparsable (spec 8.6) + - stop age < max_age_days (spec 8.7) + +Detection: + - normalized status is STOPPED_VM (TERMINATED or STOPPED) + - lastStopTimestamp parsable and stop_age_days >= max_age_days + +Cost model (spec 9.6): + estimated_monthly_cost_usd = None + Attached resources continue billing by their own pricing surface; + no flat rate estimate is appropriate. + +APIs: + - compute.googleapis.com: instances.aggregatedList with returnPartialSuccess=true +""" + +import warnings +from datetime import datetime, timezone from typing import List, Optional from google.api_core.exceptions import Forbidden, NotFound, PermissionDenied @@ -10,22 +47,92 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -# Persistent disk storage cost for stopped VMs — conservative pd-standard rate. -# vCPU and RAM do not bill when TERMINATED; only attached disks continue to charge. -_DISK_COST_PER_GB_MONTH = 0.04 # pd-standard, us-central1 +# spec 2.1: canonical stopped lifecycle states +_STOPPED_STATUSES = frozenset({"TERMINATED", "STOPPED"}) + + +def _whole_utc_days_since(ts: datetime, now: datetime) -> int: + """Return the number of whole UTC calendar days between ts and now.""" + return (now.astimezone(timezone.utc).date() - ts.astimezone(timezone.utc).date()).days def _parse_gcp_timestamp(ts: str) -> Optional[datetime]: - """Parse a GCP RFC3339 timestamp like '2024-01-15T10:30:00.000-07:00' or '...Z'.""" + """Parse a GCP RFC3339 timestamp to a UTC-aware datetime, or return None.""" if not ts: return None try: - # Strip fractional seconds for uniform parsing across Python 3.10+ - cleaned = re.sub(r"\.\d+", "", ts) - cleaned = cleaned.replace("Z", "+00:00") - return datetime.strptime(cleaned, "%Y-%m-%dT%H:%M:%S%z") - except Exception: + return datetime.fromisoformat(ts.replace("Z", "+00:00")) + except ValueError: + return None + + +def _extract_zone(zone_scope: str) -> Optional[str]: + """ + Return zone name from an exact 'zones/ZONE' aggregated scope key. + + Returns None for any other scope form, including keys with extra path + segments such as 'zones/us-central1-a/extra' (spec 9.2.1). + """ + if not zone_scope.startswith("zones/"): return None + zone = zone_scope[len("zones/") :] + # Reject empty suffix or any additional path segments + if not zone or "/" in zone: + return None + return zone + + +def _derive_region(zone: str) -> str: + """ + Derive region from a zone string by dropping the trailing zone letter. + + Returns 'unknown' when the zone string is not parseable as a standard + GCP zone (spec 9.2.4). Standard form: '{area}-{sub}-{letter}'. + """ + parts = zone.rsplit("-", 1) + if len(parts) == 2 and "-" in parts[0]: + return parts[0] + return "unknown" + + +def _is_mig_member(instance) -> bool: + """ + Return True only when first-party proof of active MIG membership is available. + + Spec 9.4.3 allows two proof-source categories: + + a) Direct managed-instance-group membership surfaces — e.g., the result of + calling instanceGroupManagers.listManagedInstances for each MIG and + checking whether the instance self-link appears. Doing so requires + additional API calls that are out of scope for a rule using only + instances.aggregatedList; this path is not exercised here. + + b) Current instance metadata — the 'created-by' key set by GCP at + instance creation time referencing 'instanceGroupManagers/...'. + This is the only first-party proof available from the aggregated + list response and is checked below. + + No name patterns, user labels, or other weak heuristics are used (spec 9.4.4). + """ + # Proof source b: GCP-set 'created-by' instance metadata + metadata = getattr(instance, "metadata", None) + if not metadata: + return False + for item in getattr(metadata, "items", None) or []: + if getattr(item, "key", None) == "created-by": + val = getattr(item, "value", "") or "" + if "instanceGroupManagers/" in val: + return True + return False + + +def _has_external_nat_ip(instance) -> bool: + """True when any network interface has a NAT IP (spec 7 / 10.2.9).""" + for nic in getattr(instance, "network_interfaces", None) or []: + for ac in getattr(nic, "access_configs", None) or []: + if getattr(ac, "nat_ip", None): + return True + return False def find_stopped_vms( @@ -36,168 +143,236 @@ def find_stopped_vms( max_age_days: int = 30, ) -> List[Finding]: """ - Find Compute Engine VMs in TERMINATED state for 30+ days. + Find Compute Engine VMs in STOPPED_VM state for max_age_days+ days. - GCE VMs in TERMINATED status stop billing for vCPU and RAM, but attached - persistent disks continue to incur storage charges. Long-running TERMINATED - instances are a reliable signal of abandoned dev/staging environments or - forgotten manual shutdowns. - - Detection logic: - - Instance status == TERMINATED - - lastStopTimestamp is older than `max_age_days` days - - Cost estimated from sum of attached disk sizes (pd-standard rate) + Detection requires lastStopTimestamp to be present and parseable. + Instances with no usable stop timestamp are skipped rather than guessed. + Instances with proven active MIG membership are excluded. IAM permissions required: - - compute.instances.list (included in roles/compute.viewer) + - compute.instances.list (roles/compute.viewer) """ findings: List[Finding] = [] now = datetime.now(timezone.utc) - cutoff = now - timedelta(days=max_age_days) instances_client = compute_v1.InstancesClient(credentials=credentials) - # aggregated_list() returns a lazy pager — PermissionDenied fires during - # iteration (not at call time), so the try/except must wrap the full loop. + # spec 9.1: aggregated inventory with returnPartialSuccess — PermissionDenied + # and NotFound fire during iteration, so the try/except wraps the full loop. try: - for zone_scope, zone_instances in instances_client.aggregated_list(project=project_id): - if not zone_instances.instances: - continue + for zone_scope, zone_instances in instances_client.aggregated_list( + request={"project": project_id, "return_partial_success": True}, + ): + # spec 9.1.4: surface partial-coverage warnings + _warn = getattr(zone_instances, "warning", None) + if _warn and getattr(_warn, "code", None): + warnings.warn( + f"gcp.compute.vm.stopped: aggregated inventory returned partial " + f"coverage for scope '{zone_scope}' (code: {_warn.code}) — " + f"findings from this scope may be incomplete", + UserWarning, + stacklevel=2, + ) - zone_name = zone_scope.split("/")[-1] - region = zone_name.rsplit("-", 1)[0] # "us-central1-a" -> "us-central1" + if not getattr(zone_instances, "instances", None): + continue - if region_filter and region != region_filter: + # spec 9.2.1: accept only exact zones/ZONE scope keys + zone_name = _extract_zone(zone_scope) + if zone_name is None: continue - for instance in zone_instances.instances: - if instance.status != "TERMINATED": - continue + # spec 9.2.3–9.2.4: derive region; 'unknown' when not parseable + region = _derive_region(zone_name) - stop_time = _parse_gcp_timestamp(instance.last_stop_timestamp or "") - - if stop_time is None: - # Cannot determine stop time — flag at MEDIUM confidence - confidence = ConfidenceLevel.MEDIUM - days_stopped_actual = None - stop_time_str = "unknown" - else: - if stop_time > cutoff: - continue # Stopped recently — below threshold - days_stopped_actual = (now - stop_time).days - stop_time_str = stop_time.isoformat() - # 90+ days stopped is a strong abandonment signal; - # 30–89 days may still be a deliberate seasonal or sprint shutdown. - confidence = ( - ConfidenceLevel.HIGH - if days_stopped_actual >= 90 - else ConfidenceLevel.MEDIUM + # spec 9.2.6: region filter with unknown region → skip (with warning) + if region_filter: + if region == "unknown": + warnings.warn( + f"gcp.compute.vm.stopped: skipped zone scope '{zone_name}' " + f"because region could not be derived " + f"(region_filter={region_filter!r})", + UserWarning, + stacklevel=2, ) + continue + if region != region_filter: + continue + + for instance in zone_instances.instances: + try: + # spec 8.1: name must be present and non-empty + if not getattr(instance, "name", ""): + continue - # Sum attached persistent disk sizes for cost estimate - disks = instance.disks or [] - persistent_disks = [d for d in disks if d.type_ == "PERSISTENT"] - total_disk_gb = sum(int(d.disk_size_gb or 0) for d in persistent_disks) - monthly_cost = round(total_disk_gb * _DISK_COST_PER_GB_MONTH, 2) + # spec 8.4: skip proven MIG members + if _is_mig_member(instance): + continue - # Boot disk presence is the strongest signal of an abandoned environment - boot_disk_count = sum(1 for d in disks if getattr(d, "boot", False)) + # spec 8.5: only STOPPED_VM lifecycle states are eligible + raw_status = instance.status or "" + if raw_status not in _STOPPED_STATUSES: + continue - labels = dict(instance.labels) if instance.labels else {} - machine_type_url = instance.machine_type or "" - machine_type = machine_type_url.split("/")[-1] if machine_type_url else "unknown" + # spec 8.6 / 9.5: lastStopTimestamp must be present and parseable + stop_time = _parse_gcp_timestamp(instance.last_stop_timestamp or "") + if stop_time is None: + continue # skip rather than guess (spec 9.5.4) - # scheduling.automaticRestart=False -> VM was configured to not restart on - # failure (preemptible-style or intentional); mild signal of deliberate shutdown. - scheduling = instance.scheduling - automatic_restart = ( - getattr(scheduling, "automatic_restart", True) if scheduling else True - ) + # spec 8.7 / 9.5.3: stop age must meet the threshold + stop_age_days = _whole_utc_days_since(stop_time, now) + if stop_age_days < max_age_days: + continue - last_start_ts = instance.last_start_timestamp or None - - signals = [ - "Instance status: TERMINATED", - f"Attached disks: {len(persistent_disks)} persistent disk(s), {total_disk_gb} GB total", - f"Estimated disk cost: ~${monthly_cost}/month (pd-standard rate — see caveats)", - ] - if days_stopped_actual is not None: - signals.insert( - 1, - f"Stopped for {days_stopped_actual} days (since {stop_time_str})", + # --- All exclusions passed: build finding --- + + # Disk analysis (spec 7) + disks = list(getattr(instance, "disks", None) or []) + persistent_disks = [d for d in disks if getattr(d, "type_", "") == "PERSISTENT"] + persistent_disk_count = len(persistent_disks) + persistent_disk_total_gb = sum( + max(0, int(getattr(d, "disk_size_gb", 0) or 0)) for d in persistent_disks ) - else: - signals.insert(1, "Stop timestamp unavailable — confidence reduced to MEDIUM") - if boot_disk_count > 0: - signals.append( - f"Boot disk present ({boot_disk_count} boot disk(s)) — " - f"strong indicator of an abandoned environment" + boot_disk_count = sum(1 for d in disks if getattr(d, "boot", False)) + disk_kinds_present = sorted( + {getattr(d, "type_", "") for d in disks if getattr(d, "type_", "")} ) - if days_stopped_actual is not None: - duration_desc = f"has been TERMINATED for {days_stopped_actual} days" - else: - duration_desc = "is TERMINATED (duration unknown)" - - details = { - "instance_name": instance.name, - "machine_type": machine_type, - "zone": zone_name, - "total_disk_gb": total_disk_gb, - "boot_disk_count": boot_disk_count, - "days_stopped_threshold": max_age_days, - "stop_time": stop_time_str, - "automatic_restart": automatic_restart, - "labels": labels, - } - if days_stopped_actual is not None: - details["days_stopped"] = days_stopped_actual - if last_start_ts: - details["last_start_timestamp"] = last_start_ts - - findings.append( - Finding( - provider="gcp", - rule_id="gcp.compute.vm.stopped", - resource_type="gcp.compute.instance", - resource_id=f"projects/{project_id}/zones/{zone_name}/instances/{instance.name}", - region=region, - title=( - f"Stopped VM ({days_stopped_actual} Days)" - if days_stopped_actual is not None - else "Stopped VM (Duration Unknown)" - ), - summary=( - f"VM '{instance.name}' ({machine_type}) in zone '{zone_name}' " - f"{duration_desc}. " - f"Attached disks ({len(persistent_disks)} disk(s), {total_disk_gb} GB) " - f"continue billing at ~${monthly_cost}/month." - ), - reason=( - f"VM has been in TERMINATED state for {days_stopped_actual} days" - if days_stopped_actual is not None - else "VM is in TERMINATED state (stop timestamp unavailable)" - ), - risk=RiskLevel.MEDIUM, - confidence=confidence, - detected_at=now, - evidence=Evidence( - signals_used=signals, - signals_not_checked=[ - "Planned seasonal or scheduled shutdown", - "IaC-managed environment pending recreation", - "Data preserved intentionally for forensics", - "Disk types (pd-ssd, pd-balanced, hyperdisk) may have higher " - "costs — estimate uses pd-standard baseline ($0.04/GB/month)", - "Regional disks (replicated across zones) incur higher storage " - "cost than the pd-standard estimate", - ], - time_window=f"{max_age_days} days", + # Machine type (spec 7) + machine_type_raw = instance.machine_type or "" + machine_type = ( + machine_type_raw.split("/")[-1] if machine_type_raw else "unknown" + ) + + # Network and GPU context (spec 7) + external_nat_ip_present = _has_external_nat_ip(instance) + gpu_attached = bool(getattr(instance, "guest_accelerators", None)) + + # Scheduling context (spec 7) + scheduling = getattr(instance, "scheduling", None) + automatic_restart = ( + getattr(scheduling, "automatic_restart", None) if scheduling else None + ) + + # Labels and timestamps + labels = dict(instance.labels) if instance.labels else {} + last_stop_timestamp_str = stop_time.isoformat() + last_start_ts = instance.last_start_timestamp or "" + + # spec 9.7: confidence is age-led + confidence = ( + ConfidenceLevel.HIGH if stop_age_days >= 90 else ConfidenceLevel.MEDIUM + ) + + # spec 10.2: signals_used + signals_used = [ + f"Instance lifecycle state: {raw_status} (STOPPED_VM)", + f"Stopped for {stop_age_days} days (threshold: {max_age_days} days)", + ( + f"Persistent disks: {persistent_disk_count} disk(s), " + f"{persistent_disk_total_gb} GB total — " + f"attached resources continue billing" ), - details=details, - estimated_monthly_cost_usd=(monthly_cost if monthly_cost > 0 else None), + f"Machine type: {machine_type}", + ] + if boot_disk_count > 0: + signals_used.append(f"Boot disk present: {boot_disk_count} boot disk(s)") + if disk_kinds_present: + signals_used.append(f"Attached disk kinds: {', '.join(disk_kinds_present)}") + if external_nat_ip_present: + signals_used.append( + "External NAT IP present — may indicate active connectivity dependency" + ) + if gpu_attached: + signals_used.append("GPU attached — higher-cost resource context") + if automatic_restart is not None: + signals_used.append(f"automaticRestart: {automatic_restart}") + + # spec 10.3: required details + details: dict = { + "instance_name": instance.name, + "machine_type": machine_type, + "zone": zone_name, + "raw_status": raw_status, + "stop_age_days": stop_age_days, + "max_age_days_threshold": max_age_days, + "last_stop_timestamp": last_stop_timestamp_str, + "mig_membership": False, # proven non-MIG (MIG members were excluded) + "persistent_disk_count": persistent_disk_count, + "persistent_disk_total_gb": persistent_disk_total_gb, + "disk_kinds_present": disk_kinds_present, + "boot_disk_count": boot_disk_count, + "external_nat_ip_present": external_nat_ip_present, + "gpu_attached": gpu_attached, + "labels": labels, + } + # conditional details (spec 10.3: when present) + if last_start_ts: + details["last_start_timestamp"] = last_start_ts + if automatic_restart is not None: + details["automatic_restart"] = automatic_restart + + # spec 10.2: signals_not_checked — region_unparseable is a + # diagnostic code only relevant when region derivation failed + # for this finding (spec 9.2 / 10.2). + signals_not_checked = [ + "Planned seasonal or scheduled shutdown intent", + "Rollback, forensics, or future restart intent", + "Exact resource-specific monthly pricing for disks and IPs " + "was not estimated", + "Static external IP usage and billing state were not fully " "resolved", + "missing_last_stop_timestamp: older or atypical VMs with no " + "usable stop timestamp are intentionally skipped", + ] + if region == "unknown": + signals_not_checked.append( + "region_unparseable: region could not be derived from zone — " + "regional context is unavailable for this instance" + ) + + findings.append( + Finding( + provider="gcp", + rule_id="gcp.compute.vm.stopped", + resource_type="gcp.compute.instance", + resource_id=( + f"projects/{project_id}/zones/{zone_name}" + f"/instances/{instance.name}" + ), + region=region, + title=f"Stopped VM ({stop_age_days}+ Days)", + summary=( + f"VM '{instance.name}' ({machine_type}) in zone '{zone_name}' " + f"has been stopped for {stop_age_days} days. " + f"Attached disks ({persistent_disk_count} disk(s), " + f"{persistent_disk_total_gb} GB) continue billing." + ), + reason=( + f"Instance has been in {raw_status} state for {stop_age_days} days " + f"(>= {max_age_days}-day threshold)" + ), + risk=RiskLevel.MEDIUM, + confidence=confidence, + detected_at=now, + evidence=Evidence( + signals_used=signals_used, + signals_not_checked=signals_not_checked, + time_window=f"{max_age_days} days", + ), + details=details, + # spec 9.6: always None — attached resources bill by their own pricing + estimated_monthly_cost_usd=None, + ) ) - ) + except (AttributeError, TypeError, ValueError) as e: + # spec 9.9.3: malformed instance records are skipped item-by-item + warnings.warn( + f"gcp.compute.vm.stopped: skipped malformed instance " + f"{getattr(instance, 'name', '')}: {e}", + UserWarning, + stacklevel=2, + ) + continue except (PermissionDenied, Forbidden) as e: raise PermissionError( diff --git a/cleancloud/providers/gcp/scan.py b/cleancloud/providers/gcp/scan.py index fa4420a..5f2f44e 100644 --- a/cleancloud/providers/gcp/scan.py +++ b/cleancloud/providers/gcp/scan.py @@ -15,20 +15,20 @@ from cleancloud.core.finding import Finding from cleancloud.output.progress import advance -from cleancloud.providers.gcp.rules.disk_unattached import find_unattached_disks -from cleancloud.providers.gcp.rules.featurestore_idle import find_idle_featurestores -from cleancloud.providers.gcp.rules.ip_unused import find_unused_static_ips -from cleancloud.providers.gcp.rules.snapshot_old import find_old_snapshots -from cleancloud.providers.gcp.rules.sql_instance_idle import find_idle_sql_instances -from cleancloud.providers.gcp.rules.tpu_idle import find_idle_tpu_nodes -from cleancloud.providers.gcp.rules.vertex_endpoint_idle import ( +from cleancloud.providers.gcp.rules.ai.featurestore_idle import find_idle_featurestores +from cleancloud.providers.gcp.rules.ai.tpu_idle import find_idle_tpu_nodes +from cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle import ( find_idle_vertex_endpoints, ) -from cleancloud.providers.gcp.rules.vertex_training_job_long_running import ( +from cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running import ( find_long_running_vertex_training_jobs, ) +from cleancloud.providers.gcp.rules.ai.workbench_idle import find_idle_workbench_instances +from cleancloud.providers.gcp.rules.disk_unattached import find_unattached_disks +from cleancloud.providers.gcp.rules.ip_unused import find_unused_static_ips +from cleancloud.providers.gcp.rules.snapshot_old import find_old_snapshots +from cleancloud.providers.gcp.rules.sql_instance_idle import find_idle_sql_instances from cleancloud.providers.gcp.rules.vm_stopped import find_stopped_vms -from cleancloud.providers.gcp.rules.workbench_idle import find_idle_workbench_instances from cleancloud.providers.gcp.session import create_gcp_session from cleancloud.providers.gcp.validate import ( validate_project_params, diff --git a/docs/rules/gcp.md b/docs/rules/gcp.md index c7b5015..be5dd89 100644 --- a/docs/rules/gcp.md +++ b/docs/rules/gcp.md @@ -6,7 +6,7 @@ | Rule ID | Cost Surface | What It Detects | |---|---|---| -| `gcp.compute.vm.stopped` | Compute | TERMINATED VMs stopped 30+ days (disk charges continue) | +| `gcp.compute.vm.stopped` | Compute | TERMINATED or STOPPED VMs for 30+ days (attached disk charges continue) | | `gcp.compute.disk.unattached` | Storage | Persistent Disks in READY state with no attached VM | | `gcp.compute.snapshot.old` | Storage | Disk snapshots older than 90 days | | `gcp.compute.ip.unused` | Network | Reserved static IPs in RESERVED state | @@ -22,81 +22,129 @@ ## Compute #### `gcp.compute.vm.stopped` -**Detects:** TERMINATED VM instances stopped 30+ days; persistent disk charges continue +**Detects:** Compute Engine VM instances in `TERMINATED` or `STOPPED` lifecycle state for `max_age_days`+ days; attached disk charges continue regardless of instance state -**Confidence / Risk:** HIGH (`lastStopTimestamp` ≥ 30 days ago); MEDIUM (TERMINATED but timestamp absent) / MEDIUM +**Confidence / Risk:** +- `stop_age_days >= 90`: HIGH +- `max_age_days <= stop_age_days < 90`: MEDIUM + +**Cost:** `estimated_monthly_cost_usd = None` — attached resources (disks, static IPs) bill by their own pricing surface; no flat estimate is appropriate **Permissions:** `compute.instances.list` (roles/compute.viewer) -**Params:** none (30-day threshold is fixed) +**Params:** `max_age_days` (default: 30) -**Exclusions:** instances not in TERMINATED state; stopped < 30 days +**Exclusions:** +- instance record malformed or `name` absent/empty +- aggregated scope key not in exact `zones/ZONE` form +- region filter set and region is unknown or does not match +- instance has proven active MIG membership (`created-by` metadata referencing `instanceGroupManagers/...`) +- lifecycle state not `TERMINATED` or `STOPPED` +- `lastStopTimestamp` absent or unparsable +- stop age < `max_age_days` -**Spec:** — +**Spec:** [docs/specs/gcp/vm_stopped.md](../specs/gcp/vm_stopped.md) --- ## Storage #### `gcp.compute.disk.unattached` -**Detects:** Persistent Disks in `READY` state with `users == []` +**Detects:** Persistent Disks in `READY` state with an explicitly empty `users[]` (no attached VM); covers both zonal and regional disks via aggregated inventory + +**Confidence / Risk:** +- Zonal, last detach ≥ 7 days ago (or never detached): HIGH +- Zonal, last detach 24 h – 7 days ago: MEDIUM — may still be mid-deletion pipeline +- Either scope, last detach < 24 h ago: LOW — very likely mid-pipeline +- Regional, unattached (any age): MEDIUM — regional disks are documented HA/failover infrastructure -**Confidence / Risk:** HIGH (unambiguous detachment) / LOW +**Cost:** `estimated_monthly_cost_usd = None` — GCP disk pricing varies by type, region, currency, and provisioned performance; the finding surfaces disk type and size only **Permissions:** `compute.disks.list` (roles/compute.viewer) -**Params:** none +**Params:** none (no user-configurable threshold — detection is based on current attachment state) -**Exclusions:** none +**Exclusions:** disk record malformed or name absent; aggregated scope key unresolvable (e.g. `global`); disk `status` not exactly `READY`; `users` field absent or not an explicit empty list; any non-empty `users` entry (attached disk) -**Spec:** — +**Spec:** [docs/specs/gcp/disk_unattached.md](../specs/gcp/disk_unattached.md) #### `gcp.compute.snapshot.old` -**Detects:** Disk snapshots older than `days_old`; confidence reflects whether source disk still exists +**Detects:** Standard disk snapshots older than `max_age_days` that are not part of an automated backup workflow + +**Confidence / Risk:** LOW (age alone is not proof of waste; incremental chain sharing means deletion may not reclaim billed storage proportionally) / LOW -**Confidence / Risk:** HIGH (source disk no longer exists — orphaned); MEDIUM (source disk still exists) / LOW +**Cost:** `estimated_monthly_cost_usd = None` — snapshot pricing varies by type (standard vs archive), storage location, and region; no flat per-GB rate is hardcoded **Permissions:** `compute.snapshots.list` (roles/compute.viewer) -**Params:** `days_old` (default: 90) +**Params:** `max_age_days` (default: 90) -**Exclusions:** snapshots not in `READY` status; younger than threshold; `region_filter` is ignored (snapshots are global) +**Exclusions:** +- snapshot record malformed or `name` absent/empty +- `status` not exactly `READY` (skips `CREATING`, `DELETING`, `FAILED`, `UPLOADING`) +- `creationTimestamp` absent or unparsable +- age < `max_age_days` +- `snapshotType == "ARCHIVE"` (low-cost long-retention class — out of scope) +- `sourceSnapshotSchedulePolicy` or `sourceSnapshotSchedulePolicyId` non-empty (schedule-created backup) +- `autoCreated == true` (auto-created backup) +- `region_filter` is ignored (snapshots are global resources) -**Spec:** — +**Spec:** [docs/specs/gcp/snapshot_old.md](../specs/gcp/snapshot_old.md) --- ## Network #### `gcp.compute.ip.unused` -**Detects:** Reserved static IPs (regional and global) in `RESERVED` state (GCP confirms not attached) +**Detects:** Regional and global static external IPv4 address reservations in `RESERVED` state — allocated but not attached to any resource -**Confidence / Risk:** HIGH (GCP confirms RESERVED state) / LOW +**Confidence / Risk:** HIGH (`RESERVED` state is canonical GCP control-plane confirmation of non-attachment) / LOW -**Permissions:** `compute.addresses.list`, `compute.globalAddresses.list` (roles/compute.viewer); gracefully degrades if globalAddresses permission denied +**Cost:** `estimated_monthly_cost_usd = 7.30` — derived from Google's documented **$0.01/hour** unused static external IPv4 rate × 730-hour normalized month; actual billing remains hourly and may vary by contract or currency -**Params:** none +**Permissions:** `compute.addresses.list`, `compute.globalAddresses.list` (both included in roles/compute.viewer); permission failures on either surface surface as a permission error -**Exclusions:** IPs in `IN_USE` status; global IPs skipped if `region_filter` is set +**Params:** none — detection is based on current control-plane state, not age -**Spec:** — +**Exclusions:** +- address record malformed or `name` absent/empty +- regional aggregated scope key not exactly `regions/REGION` +- `status` not exactly `RESERVED` (skips `IN_USE`, `RESERVING`, unknown) +- `addressType` not exactly `EXTERNAL` (internal addresses are not billed this way) +- `ipVersion` not exactly `IPV4` (IPv6 addresses are out of scope) +- `purpose == "NAT_AUTO"` (Cloud NAT automatic allocations) +- `users[]` non-empty (contradictory current-use evidence) +- global addresses skipped when `region_filter` is active + +**Spec:** [docs/specs/gcp/ip_unused.md](../specs/gcp/ip_unused.md) --- ## Platform #### `gcp.sql.instance.idle` -**Detects:** Cloud SQL instances with zero connections for `idle_days`; if Monitoring unavailable, instance is assumed active (conservative fallback — not flagged) +**Detects:** Primary Cloud SQL instances (`CLOUD_SQL_INSTANCE`) in `RUNNABLE` state with zero observed active connections over the full `idle_days` window; metric coverage must be confirmed full (partial or sparse coverage skips rather than emits) **Confidence / Risk:** HIGH (Cloud Monitoring confirms zero connections for full window) / HIGH +**Cost:** `estimated_monthly_cost_usd = None` — pricing varies by edition, region, compute shape, HA, storage, and commitment model; no flat estimate is appropriate + **Permissions:** `cloudsql.instances.list` (roles/cloudsql.viewer), `monitoring.timeSeries.list` (roles/monitoring.viewer) **Params:** `idle_days` (default: 14) -**Exclusions:** read replicas; instances not in `RUNNABLE` state - -**Spec:** — +**Exclusions:** +- instance record malformed or `name` absent/empty +- `region` absent/empty +- region filter set and region does not exactly match +- `state` not exactly `RUNNABLE` +- `instanceType` not exactly `CLOUD_SQL_INSTANCE` +- `masterInstanceName` present and non-empty (replica-shaped instance) +- `createTime` absent, unparsable, or instance newer than `window_start` (full window not coverable) +- active-connections metric coverage unresolved (no series, no points, partial window, large gap > 10 min, timestamp parse failure, or query failure) +- `active_connections_max > 0` anywhere in the full window + +**Spec:** [docs/specs/gcp/sql_instance_idle.md](../specs/gcp/sql_instance_idle.md) --- diff --git a/docs/specs/gcp/disk_unattached.md b/docs/specs/gcp/disk_unattached.md new file mode 100644 index 0000000..1718381 --- /dev/null +++ b/docs/specs/gcp/disk_unattached.md @@ -0,0 +1,364 @@ +# GCP Rule Spec - `gcp.compute.disk.unattached` + +## 1. Rule Identity + +- **Rule ID:** `gcp.compute.disk.unattached` +- **Provider:** GCP +- **Resource type:** Compute Engine persistent disk +- **Finding resource_type:** `gcp.compute.disk` + +--- + +## 2. Intent + +Detect **Compute Engine persistent disks that are currently unattached to any VM and still bill for storage** so they can be reviewed as conservative cleanup candidates. + +This rule is deliberately **precision-first**. It is **not** proof that deleting a disk is safe, **not** proof that no failover / restore workflow exists, and **not** proof of a specific monthly saving. It is a conservative review-candidate rule for disks that are present, billable, and currently unattached. + +--- + +## 3. GCP Documentation Grounding + +### 3.1 Persistent disks are independent resources that continue to exist outside VM lifecycle + +Google documents persistent disks as Compute Engine storage resources used by VM instances, with separate zonal and regional disk resources. + +Sources: + +- *Resource: Disk* +- *Resource: regionDisks* + +URLs: + +- https://cloud.google.com/compute/docs/reference/rest/v1/disks +- https://cloud.google.com/compute/docs/reference/rest/v1/regionDisks + +Rule consequence: + +1. Unattached persistent disks are a valid hygiene review surface. +2. The rule should operate from disk control-plane state, not VM guest inspection. + +### 3.2 Canonical attachment signal is the disk `users[]` field + +Google documents for both zonal and regional disk resources: + +1. `users[]` is an output-only list of attached-instance links +2. `lastAttachTimestamp` is the last attach timestamp +3. `lastDetachTimestamp` is the last detach timestamp + +Sources: + +- *Resource: Disk* +- *Resource: regionDisks* + +URLs: + +- https://cloud.google.com/compute/docs/reference/rest/v1/disks +- https://cloud.google.com/compute/docs/reference/rest/v1/regionDisks + +Rule consequence: + +1. `users[]` is the canonical current-attachment surface for this rule. +2. `lastDetachTimestamp` and `lastAttachTimestamp` are contextual timing signals only. +3. A disk with any current `users[]` entry is attached and out of scope. + +### 3.3 Disk creation status is documented and only `READY` is stably evaluable + +Google documents disk status values including: + +- `CREATING` +- `RESTORING` +- `FAILED` +- `READY` +- `DELETING` + +Source: + +- *Resource: Disk* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/disks + +Rule consequence: + +1. Only `status == "READY"` is eligible for emission. +2. `CREATING`, `RESTORING`, `FAILED`, and `DELETING` must skip. + +### 3.4 Aggregated disk inventory returns zonal and regional scope separately + +Google documents `disks.aggregatedList` as an aggregated list of persistent disks and describes per-scope results. The response includes disk fields for both zonal and regional disks and supports partial-success behavior. + +Source: + +- *Method: disks.aggregatedList* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/disks/aggregatedList + +Rule consequence: + +1. The rule may enumerate disks via aggregated inventory. +2. Scope keys should be interpreted conservatively as zonal or regional inventory context. +3. Unknown or unsupported scope kinds must skip. +4. Partial-success behavior exists and must be handled conservatively by implementations. + +### 3.5 Regional persistent disks are explicitly high-availability infrastructure + +Google documents regional persistent disks and Hyperdisk Balanced High Availability as synchronously replicated storage for high-availability services, failover, and lower RPO/RTO designs. + +Sources: + +- *About regional persistent disk* +- *Create and manage regional disks* + +URLs: + +- https://cloud.google.com/compute/docs/disks/about-regional-persistent-disk +- https://cloud.google.com/compute/docs/disks/regional-persistent-disk + +Rule consequence: + +1. Regional disks are still billable and can still be unattached review candidates. +2. Regional unattached disks are more operationally ambiguous than zonal disks because they are explicitly documented HA / failover infrastructure. +3. Confidence for unattached regional disks should therefore be more conservative than for equivalent zonal disks. + +### 3.6 Pricing varies by disk type, region, currency, and for some disks separate provisioned performance + +Google documents that disk pricing varies and points users to the Pricing Calculator, Pricing Table, Cloud Billing reports, SKUs, and Catalog API for exact pricing. The pricing page also distinguishes disk/image pricing from VM pricing and notes that Cloud Platform SKUs apply for non-USD billing. + +Source: + +- *Disk and image pricing* + +URL: + +- https://cloud.google.com/compute/disks-image-pricing + +Rule consequence: + +1. The rule must **not** hardcode a single capacity-only disk price table as authoritative. +2. The rule must **not** claim exact monthly savings from disk size and type alone. +3. `estimated_monthly_cost_usd` should remain `None` unless a future implementation uses a documented, region-aware pricing source. + +--- + +## 4. Detection Goal + +Emit only when the disk passes every rule in section **8**. Section **8** is the single source of truth for decisioning; sections **7** and **9** define normalization and evaluation contracts. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that deleting the disk is safe +- that the disk is not reserved for imminent VM recreation +- that the disk is not part of a failover, restore, or migration workflow +- that the disk is not intentionally retained as HA capacity +- that a specific monthly dollar saving exists + +--- + +## 6. Canonical Inputs + +### 6.1 Required surfaces + +| Surface | Purpose | +|---|---| +| Compute Engine persistent disk aggregated inventory | enumerate zonal and regional disks and collect current attachment, status, type, timestamps, labels, and scope | + +No VM guest metrics, Cloud Monitoring metrics, or audit-log evidence are required for this rule. + +### 6.2 Authentication / permissions + +Minimum permission: + +- `compute.disks.list` + +Typical predefined role: + +- `roles/compute.viewer` + +### 6.3 Thresholds + +This rule has **no user-configurable parameter**. + +It uses documented current-attachment state plus conservative confidence shaping: + +1. **zonal disks** + 1. `LOW` when last detach is known and `< 24 hours` + 2. `MEDIUM` when last detach is known and `>= 24 hours` but `< 7 days` + 3. `HIGH` otherwise +2. **regional disks** + 1. baseline confidence is `MEDIUM` + 2. if last detach is known and `< 24 hours`, downgrade to `LOW` + 3. otherwise remain `MEDIUM` + +Reason: + +- GCP documents the timestamps and HA semantics, but it does not define an “orphaned disk” platform state. +- This rule therefore emits from current unattached state, while using recent-detach timing and regional-HA context to reduce overconfidence. + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `scope_key` | Resolve from aggregated inventory scope key. Supported forms are zonal (`zones/ZONE`) and regional (`regions/REGION`). Any other scope kind is unsupported and must skip. | +| `location` | For zonal disks, use the zone from the aggregated scope key. For regional disks, use the region from the aggregated scope key. If zonal location parsing fails, skip rather than guess. | +| `region_filter` | Compare exactly against the normalized **region** (`us-central1`, not `us-central1-a`) for both zonal and regional disks. If normalized region derivation fails, skip. | +| `status` | Resolve from documented disk `status` and compare case-sensitively to exact `"READY"`. | +| `users` | Treat as the canonical current-attachment surface. Only an explicitly empty list means currently unattached. Any non-empty entry or entries mean attached. Non-list, missing, or unresolved values are not equivalent to empty. | +| `disk_type` | Preserve the short terminal type name extracted from the documented disk type URL when possible; otherwise preserve unknown. | +| `size_gb` | Parse from documented `sizeGb` / SDK equivalent as a non-negative integer when possible; otherwise `0` for context only. | +| `creation_timestamp` | Preserve the raw documented RFC3339 timestamp for reviewer context only; it must not determine emission. | +| `last_detach_timestamp` | Parse as a UTC instant from documented `lastDetachTimestamp` when present. If unparsable, treat as unknown rather than failing the disk. | +| `last_attach_timestamp` | Preserve the raw documented value for reviewer context only. | +| `is_regional` | `True` when the aggregated scope is regional, otherwise `False`. | +| `labels` | `disk.labels or {}` - never `None` in output. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | disk record is malformed or `name` absent / empty | Skip | +| 8.2 | aggregated scope key is unsupported or cannot be resolved to zonal vs regional context | Skip | +| 8.3 | region filter is set and normalized disk region does not match | Skip | +| 8.4 | disk `status` is absent, unknown, or not exactly `"READY"` | Skip | +| 8.5 | disk `users` is unresolved or not reliably interpretable as current attachment state | Skip | +| 8.6 | disk `users` is non-empty | Skip | +| 8.7 | all required signals resolve and the disk is `READY` with empty `users` | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Inventory and scope contract + +Required behavior: + +1. Enumerate disks from documented Compute Engine disk inventory surfaces. +2. Treat aggregated scope keys of the form `zones/ZONE` as zonal. +3. Treat aggregated scope keys of the form `regions/REGION` as regional. +4. Skip any unexpected scope kind such as malformed or unsupported keys. +5. For zonal disks, derive comparison region from the zone name by removing the final `-` segment. +6. For regional disks, comparison region is the regional scope name directly. +7. If zonal scope parsing or zone-to-region derivation fails, skip rather than guessing a region. +8. If aggregated inventory returns partial-success, warnings, or otherwise incomplete scope coverage, implementations must not silently treat the result as complete project coverage. +9. Under partial aggregated coverage, item-level findings from successfully enumerated scopes may still be emitted, but zero findings must not be interpreted as a clean project. + +### 9.2 Status contract + +Required behavior: + +1. Only `status == "READY"` is eligible. +2. `CREATING`, `RESTORING`, `FAILED`, and `DELETING` must skip. +3. Unknown or unresolved status must skip. + +### 9.3 Attachment contract + +Required behavior: + +1. Use `users[]` as the sole trusted current-attachment surface for this rule. +2. A disk is currently unattached only when `users[]` resolves to an explicitly empty list. +3. A disk is attached when `users[]` contains one or more entries, including multiple entries. +4. If the attachment surface is malformed, missing in an unusable way, or cannot be resolved reliably, skip rather than assume unattached. +5. `lastAttachTimestamp` and `lastDetachTimestamp` are contextual evidence only; they must not override a non-empty `users[]`. + +### 9.4 Confidence contract + +Required behavior: + +1. Baseline confidence for a currently unattached zonal disk is `HIGH`. +2. Baseline confidence for a currently unattached regional disk is `MEDIUM`. +3. If `lastDetachTimestamp` is present and parseable: + 1. `< 24 hours` since last detach -> `LOW` + 2. `>= 24 hours` and `< 7 days` since last detach -> `MEDIUM` for zonal disks + 3. `>= 7 days` since last detach -> no downgrade for zonal disks + 4. regional disks remain capped at `MEDIUM` unless downgraded to `LOW` for recent detach +4. If `lastDetachTimestamp` is absent or unusable, keep the baseline confidence for the disk scope. + +Rationale: + +Google documents current attachment (`users[]`) and detach timestamps, but it does not provide a dedicated “abandoned disk” state. This rule therefore treats current unattached state as sufficient to emit while using recency and regional-HA semantics only to modulate confidence. + +### 9.5 Cost model contract + +Required behavior: + +1. `estimated_monthly_cost_usd = None` +2. Do **not** use flat region-reference price tables such as fixed `$ / GB / month` maps. +3. Do **not** treat hyperdisk or provisioned-performance disks as capacity-only costs. +4. State only that unattached persistent disks continue to incur storage charges. + +Rationale: + +Google’s pricing documentation explicitly varies by disk type, region, currency, and pricing source, and points users to SKU-aware sources for exact prices. A static in-code table would overstate precision. + +### 9.6 Failure behavior contract + +Required behavior: + +1. Permission failures for disk inventory should surface as a permission error, not silent empty findings. +2. If the Compute Engine API for disks is unavailable / disabled for the project, returning no findings is acceptable. +3. Malformed disk records should be skipped item-by-item rather than failing the whole rule. +4. Partial aggregated inventory coverage must be surfaced as incomplete coverage or degraded scan state; it must not silently collapse into a clean no-findings outcome. + +--- + +## 10. Finding Shape + +### 10.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"gcp"` | +| `rule_id` | `"gcp.compute.disk.unattached"` | +| `resource_type` | `"gcp.compute.disk"` | +| `resource_id` | canonical project/location disk path | +| `region` | zonal location for zonal disks, regional location for regional disks | +| `confidence` | derived from section `9.4` | +| `estimated_monthly_cost_usd` | `None` | + +### 10.2 Required evidence + +`signals_used` must clearly disclose: + +1. disk `status` is `READY` +2. `users[]` is empty +3. whether the disk is zonal or regional +4. if present, how recently the disk was detached + +`signals_not_checked` should include remaining blind spots such as: + +1. imminent VM recreation intent +2. restore / migration / failover workflow intent +3. exact disk pricing from region-aware billing data + +### 10.3 Required details + +Details should include at least: + +- `disk_name` +- `disk_type` +- `size_gb` +- `location` +- `is_regional` +- `labels` +- `creation_timestamp` +- `last_detach_timestamp` when present +- `last_attach_timestamp` when present + +--- + +## 11. Failure Behavior + +- Permission denied on disk inventory -> raise permission error +- Compute Engine API disabled / not found for the project -> return no findings +- Malformed or unsupported scoped disk records -> skip those items diff --git a/docs/specs/gcp/ip_unused.md b/docs/specs/gcp/ip_unused.md new file mode 100644 index 0000000..8a95be5 --- /dev/null +++ b/docs/specs/gcp/ip_unused.md @@ -0,0 +1,424 @@ +# GCP Rule Spec - `gcp.compute.ip.unused` + +## 1. Rule Identity + +- **Rule ID:** `gcp.compute.ip.unused` +- **Provider:** GCP +- **Resource type:** Compute Engine address reservation +- **Finding resource_type:** `gcp.compute.address` for regional addresses, `gcp.compute.global_address` for global addresses + +--- + +## 2. Intent + +Detect **static external IPv4 address reservations that are currently in `RESERVED` state and therefore represent higher-cost, not-in-use external IP allocations** so they can be reviewed as conservative cleanup candidates. + +This rule is deliberately **precision-first**. It is a **review-candidate** rule only, not proof that releasing the address is safe, not proof that no allowlists or DNS dependencies exist, and not proof that the public list price exactly matches the customer bill. + +--- + +## 3. GCP Documentation Grounding + +### 3.1 Regional and global address resources expose canonical status and usage fields + +Google documents regional and global Compute Engine address resources with: + +1. `status` values `RESERVING`, `RESERVED`, and `IN_USE` +2. `users[]` as the output-only URLs of resources using the address +3. `addressType` as `INTERNAL` or `EXTERNAL` +4. `ipVersion` as `IPV4` or `IPV6` +5. `purpose` +6. `networkTier` + +Sources: + +- *Resource: addresses* +- *Resource: globalAddresses* + +URLs: + +- https://cloud.google.com/compute/docs/reference/rest/v1/addresses +- https://cloud.google.com/compute/docs/reference/rest/v1/globalAddresses + +Rule consequence: + +1. Current address state must be evaluated from documented control-plane fields. +2. `status == "RESERVED"` is the canonical not-in-use state for this rule. +3. `IN_USE` and `RESERVING` are out of scope. + +### 3.2 GCP bills external IPv4 addresses differently when unused vs in use + +Google documents current external IP pricing as follows: + +1. You are charged for static and ephemeral external IP addresses. +2. A static external IP address that is assigned but unused is charged at **$0.01 per hour**. +3. Static and ephemeral IP addresses in use on standard VMs are charged at **$0.005 per hour**. +4. Static and ephemeral IP addresses used by Cloud NAT are charged at **$0.005 per hour**. +5. Static external IP addresses assigned to forwarding rules are **not charged**. +6. Google considers a static external IP address **in use** when it is associated with a VM instance whether the VM is running or stopped. +7. If a static external IP address is dissociated from the instance or the instance is deleted, Google considers it **not in use**. + +Source: + +- *VPC pricing - External IP address pricing* + +URL: + +- https://cloud.google.com/vpc/pricing#ipaddress + +Rule consequence: + +1. This rule must target the **higher-cost unused static external IPv4** state, not all external IP billing. +2. This rule must not treat in-use static IPs as eligible findings even though they are still billed. +3. Addresses attached to forwarding rules must not be emitted as unused. +4. The rule may rely only on documented address control-plane state. +5. It must not invent separate indirect attachment heuristics beyond what the address APIs expose. + +### 3.3 Internal IPs and external IPv6 are out of scope for this billing rule + +Google documents: + +1. There is **no charge** for static or ephemeral internal IP addresses. +2. You are not charged for external IPv6 address ranges assigned to subnets, for external IPv6 addresses assigned to VM instances, or for static regional IPv6 addresses. + +Source: + +- *VPC pricing - Internal IP address pricing / External IP address pricing* + +URL: + +- https://cloud.google.com/vpc/pricing#ipaddress + +Rule consequence: + +1. Internal addresses are out of scope. +2. This rule should be scoped to **external IPv4** only. +3. IPv6 addresses must not be emitted by this rule. + +### 3.4 Aggregated regional inventory supports partial success and warning surfaces + +Google documents `addresses.aggregatedList` as the aggregated regional inventory surface and recommends `returnPartialSuccess=true` to prevent failure. The response can include scoped `warning` data and top-level `unreachables`. + +Source: + +- *Method: addresses.aggregatedList* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/addresses/aggregatedList + +Rule consequence: + +1. Regional address inventory may be enumerated from aggregated inventory. +2. Partial coverage must be surfaced conservatively and must not be treated as proof that the project is clean. + +### 3.5 Cloud NAT automatic NAT IPs are reserved external IPs managed by Cloud NAT + +Google documents that Public NAT automatic allocation: + +1. creates **static (reserved) regional external IP addresses** +2. adds and removes them automatically based on gateway needs +3. exposes them in the list of static external IP addresses + +Google also documents address `purpose = NAT_AUTO` for regional external IP addresses used by Cloud NAT automatic NAT IP address allocation. + +Sources: + +- *Cloud NAT - IP addresses and ports* +- *Resource: addresses* + +URLs: + +- https://cloud.google.com/nat/docs/ports-and-addresses +- https://cloud.google.com/compute/docs/reference/rest/v1/addresses + +Rule consequence: + +1. A reserved external IP with `purpose == "NAT_AUTO"` is not an unused customer-held reservation. +2. `NAT_AUTO` addresses must be excluded even if they appear in the static external IP list. + +### 3.6 Network tier is contextual, not a detection primitive + +Google documents: + +1. internal IP addresses are always `PREMIUM` +2. global external IP addresses are always `PREMIUM` +3. regional external IP addresses can be `PREMIUM` or `STANDARD` + +Sources: + +- *Resource: addresses* +- *Resource: globalAddresses* + +URLs: + +- https://cloud.google.com/compute/docs/reference/rest/v1/addresses +- https://cloud.google.com/compute/docs/reference/rest/v1/globalAddresses + +Rule consequence: + +1. `networkTier` may be useful reviewer context. +2. `networkTier` must not override the canonical unused-state contract. + +--- + +## 4. Detection Goal + +Emit only when the address passes every rule in section **8**. Section **8** is the single source of truth for decisioning; sections **7** and **9** define normalization and evaluation contracts. + +Decision precedence is: + +1. normalize scope and required fields +2. apply hard scope and billable-surface exclusions +3. apply canonical unused-state and Cloud NAT exclusions +4. treat `users[]` only as contradictory current-use evidence +5. emit only when no exclusion applies + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that releasing the IP is operationally safe +- that DNS, firewall allowlists, or customer integrations no longer depend on the address +- that no imminent re-attachment is intended +- that the address is not intentionally held for manual failover, allowlisting, or cutover use +- that the exact billed amount matches public USD list pricing + +--- + +## 6. Canonical Inputs + +### 6.1 Required surfaces + +| Surface | Purpose | +|---|---| +| `addresses.aggregatedList` | enumerate regional address reservations, including status, users, address type, IP version, purpose, network tier, labels, and warnings | +| `globalAddresses.list` | enumerate global address reservations with the same control-plane fields except regional scope | +| VPC pricing page | authoritative billing semantics for unused static external IPv4 addresses | + +### 6.2 Authentication / permissions + +Required permissions: + +- `compute.addresses.list` +- `compute.globalAddresses.list` + +Typical predefined role: + +- `roles/compute.viewer` + +### 6.3 Thresholds + +This rule has **no user-configurable parameter**. + +Detection is based on current control-plane state, not age. + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `scope_key` | For aggregated regional inventory, supported form is exactly `regions/REGION`. Any other value is unsupported and must skip. | +| `scope` | `"regional"` for `regions/REGION`; `"global"` for global address inventory. | +| `region` | For regional addresses, use the region name from the aggregated scope key. For global addresses, use exact string `"global"`. | +| `status` | Resolve from documented address `status` and compare case-sensitively to canonical values such as `"RESERVED"` and `"IN_USE"`. | +| `address_type` | Resolve from documented `addressType` and compare case-sensitively to exact `"EXTERNAL"`. Unknown / unresolved must skip. | +| `ip_version` | Resolve from documented `ipVersion` and compare case-sensitively to exact `"IPV4"`. Unknown / unresolved must skip. | +| `purpose` | Preserve exact documented purpose string when present. | +| `users` | Treat `users[]` as contextual current-use evidence. A non-empty list means in-use evidence and must skip. Missing or empty does not override status. | +| `network_tier` | Preserve exact documented value when present; if absent, preserve as unknown context rather than guessing. | +| `labels` | `address.labels or {}` - never `None` in output. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | address record is malformed or `name` absent / empty | Skip | +| 8.2 | regional aggregated scope key is unsupported or malformed | Skip | +| 8.3 | region filter is set and the normalized regional scope does not match | Skip | +| 8.4 | address is global and a region filter is set | Skip | +| 8.5 | `status` is absent, unknown, or not exactly `"RESERVED"` | Skip | +| 8.6 | `addressType` is absent, unknown, or not exactly `"EXTERNAL"` | Skip | +| 8.7 | `ipVersion` is absent, unknown, or not exactly `"IPV4"` | Skip | +| 8.8 | `purpose == "NAT_AUTO"` | Skip | +| 8.9 | `users[]` resolves to one or more entries | Skip | +| 8.10 | all required signals resolve and the address is an external IPv4 reservation in `RESERVED` state with no contradictory current-use evidence | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Inventory contract + +Required behavior: + +1. Enumerate regional addresses from `addresses.aggregatedList`. +2. Use `returnPartialSuccess=true` for aggregated regional inventory. +3. Enumerate global addresses from `globalAddresses.list` unless a region filter is active. +4. Treat aggregated scope keys of the form `regions/REGION` as regional. +5. Skip any unexpected regional scope kind. +6. If aggregated inventory returns scoped warnings, top-level warnings, or `unreachables`, implementations must not silently treat the result as complete project coverage. +7. Under partial aggregated coverage, item-level findings from successfully enumerated scopes may still be emitted, but zero findings must not be interpreted as a clean project. + +### 9.2 Unused-state contract + +Required behavior: + +1. `status == "RESERVED"` is the canonical eligible state. +2. `status == "IN_USE"` must skip. +3. `status == "RESERVING"` must skip. +4. Unknown or unresolved status must skip. + +Rationale: + +Google explicitly documents `RESERVED` as currently reserved and available to use, while `IN_USE` is currently being used by another resource. + +### 9.3 Billable-scope contract + +Required behavior: + +1. Only `addressType == "EXTERNAL"` is in scope. +2. Only `ipVersion == "IPV4"` is in scope. +3. Internal addresses must skip. +4. IPv6 addresses must skip. + +Rationale: + +Google explicitly documents no charge for internal IP addresses and no-charge external IPv6 cases relevant to this rule surface. This rule is therefore scoped to the billed unused static external IPv4 contract. + +### 9.4 Cloud NAT exclusion contract + +Required behavior: + +1. If `purpose == "NAT_AUTO"`, skip. +2. `NAT_AUTO` must be treated as Cloud NAT automatic allocation, not as a customer-held unused reservation. + +### 9.5 `users[]` contract + +Required behavior: + +1. A non-empty `users[]` list is contradictory current-use evidence and must skip. +2. Empty or absent `users[]` does not create eligibility by itself; canonical eligibility still depends on section `9.2`. +3. `users[]` is supportive evidence only, not a substitute for the documented `status` contract. +4. Implementations must not separately traverse indirect dependency chains such as forwarding rules, target proxies, or backend services. +5. A future rule revision may add those surfaces only if it is backed by an official documented contract. + +### 9.6 Global / region-filter contract + +Required behavior: + +1. Regional addresses participate in exact region filtering by normalized regional name. +2. Global addresses have no regional scope and must be skipped when a region filter is active. + +### 9.7 Cost model contract + +Required behavior: + +1. `estimated_monthly_cost_usd = 7.30` +2. The estimate must be derived from Google’s documented **$0.01/hour** price for a static IP address that is assigned but unused, using a normalized **730-hour month**. +3. The summary/evidence must make clear that this is an **estimated** public USD list-price monthly equivalent derived from hourly pricing, not contract-specific billing. +4. The monthly figure is a rounded estimate for comparability across rules; authoritative billing remains hourly. +5. Do not use the lower in-use rates for standard VMs, Spot/preemptible VMs, Cloud NAT, or forwarding-rule attachments. + +Rationale: + +Google’s current pricing page explicitly documents the higher unused static external IPv4 hourly rate. The rule’s monthly estimate may therefore be derived from that rate using a normalized 730-hour month as a rounded cross-rule comparison figure, while still disclosing that actual billing remains hourly and can vary by currency, contract, or exact calendar month length. + +### 9.8 Failure behavior contract + +Required behavior: + +1. Permission failures for regional inventory should surface as a permission error, not silent empty findings. +2. Permission failures for global inventory should also surface as a permission error during full-scope scans; they must not silently degrade to regional-only coverage. +3. If the Compute Engine API for addresses is unavailable / disabled for the project, returning no findings is acceptable. +4. Malformed address records should be skipped item-by-item rather than failing the whole rule. +5. Partial aggregated regional coverage must be surfaced as incomplete coverage or degraded scan state; it must not silently collapse into a clean no-findings outcome. + +--- + +## 10. Confidence and Risk + +### 10.1 Confidence + +| Condition | Confidence | +|---|---| +| Finding emitted | `HIGH` | + +Rationale: + +Google documents `RESERVED` vs `IN_USE` control-plane state explicitly, so current unused reservation state is a high-confidence signal. + +### 10.2 Risk + +| Condition | Risk | +|---|---| +| Finding emitted | `LOW` | + +Rationale: + +Unused reserved external IPs are usually low direct operational risk to review, but they are still not automatically safe to release. + +--- + +## 11. Finding Shape + +### 11.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"gcp"` | +| `rule_id` | `"gcp.compute.ip.unused"` | +| `resource_type` | `gcp.compute.address` for regional, `gcp.compute.global_address` for global | +| `resource_id` | canonical project/scope address path | +| `region` | regional name for regional addresses; `"global"` for global addresses | +| `confidence` | `HIGH` | +| `risk` | `LOW` | +| `estimated_monthly_cost_usd` | `7.30` | + +### 11.2 Required evidence + +`signals_used` must clearly disclose: + +1. address `status` is `RESERVED` +2. address type is external +3. IP version is IPv4 +4. whether the address is regional or global +5. if known, the network tier +6. that the monthly cost is an estimate derived from public USD list pricing + +`signals_not_checked` should include remaining blind spots such as: + +1. imminent re-attachment intent +2. DNS / firewall allowlist / customer integration dependencies +3. operational reserve, cutover, or manual failover intent +4. contract-specific or non-USD billing differences + +### 11.3 Required details + +Details should include at least: + +- `address_name` +- `ip_address` +- `scope` +- `is_regional` +- `address_type` +- `ip_version` +- `network_tier` +- `purpose` +- `creation_timestamp` +- `labels` + +--- + +## 12. Failure Behavior + +- Regional inventory permission denied -> raise permission error +- Global inventory permission denied during full-scope scan -> raise permission error +- Compute Engine API disabled / not found for the project -> return no findings +- Malformed or unsupported scoped address records -> skip those items +- Partial aggregated regional inventory coverage -> do not treat zero findings as proof of full clean coverage diff --git a/docs/specs/gcp/snapshot_old.md b/docs/specs/gcp/snapshot_old.md new file mode 100644 index 0000000..d85afd8 --- /dev/null +++ b/docs/specs/gcp/snapshot_old.md @@ -0,0 +1,444 @@ +~~# GCP Rule Spec - `gcp.compute.snapshot.old` + +## 1. Rule Identity + +- **Rule ID:** `gcp.compute.snapshot.old` +- **Provider:** GCP +- **Resource type:** Compute Engine snapshot +- **Finding resource_type:** `gcp.compute.snapshot` + +--- + +## 2. Intent + +Detect **old standard snapshot resources that are conservative cleanup review candidates** after excluding stronger Google-documented signals that the snapshot is part of an intentional automated backup workflow. + +This rule is deliberately **precision-first**. It is a **review-candidate** rule only, not proof that a snapshot is unused, not proof that deleting it will reduce cost proportionally, and not proof that it is safe to remove. + +--- + +## 3. GCP Documentation Grounding + +### 3.1 Snapshot resource exposes canonical lifecycle, age, and storage fields + +Google documents the Compute Engine `Snapshot` resource with fields including: + +1. `creationTimestamp` +2. `status` +3. `sourceDisk` +4. `sourceDiskId` +5. `diskSizeGb` +6. `storageBytes` +7. `storageBytesStatus` +8. `storageLocations` +9. `autoCreated` +10. `chainName` +11. `sourceSnapshotSchedulePolicy` +12. `sourceSnapshotSchedulePolicyId` +13. `snapshotType` + +Source: + +- *Resource: snapshots* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/snapshots + +Rule consequence: + +1. Age must be based on documented `creationTimestamp`. +2. Only stable snapshot lifecycle states should be evaluated. +3. `diskSizeGb` and `storageBytes` are different signals and must not be conflated. + +### 3.2 Only `READY` snapshots are stably evaluable + +Google documents snapshot `status` values including: + +- `CREATING` +- `DELETING` +- `FAILED` +- `READY` +- `UPLOADING` + +Source: + +- *Resource: snapshots* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/snapshots + +Rule consequence: + +1. Only `status == "READY"` is eligible. +2. `CREATING`, `DELETING`, `FAILED`, and `UPLOADING` must skip. + +### 3.3 Standard snapshots are incremental and deleting one does not necessarily reclaim all of its data + +Google documents: + +1. Standard snapshots are incremental. +2. `storageBytes` is the storage used by the snapshot and can change as snapshots are created or deleted. +3. Because subsequent snapshots can depend on previous snapshots, deleting a snapshot does not necessarily delete all data on that snapshot. + +Sources: + +- *Resource: snapshots* +- *About archive and standard disk snapshots* + +URLs: + +- https://cloud.google.com/compute/docs/reference/rest/v1/snapshots +- https://cloud.google.com/compute/docs/disks/snapshots + +Rule consequence: + +1. Age alone is not a direct cost-reclaim signal. +2. `diskSizeGb` must not be treated as billed snapshot size. +3. Even `storageBytes` must not be converted into a confident monthly saving estimate without a pricing model that fully matches snapshot type and storage location. + +### 3.4 Snapshot schedules are a documented intentional backup workflow + +Google documents that snapshot schedules: + +1. create standard snapshots at scheduled intervals +2. are a best practice for regular disk backup +3. can retain auto-generated snapshots indefinitely if no retention policy is configured +4. expose lifecycle metadata through snapshot fields such as `autoCreated`, `sourceSnapshotSchedulePolicy`, and `sourceSnapshotSchedulePolicyId` + +Sources: + +- *About snapshot schedules for disks* +- *Resource: snapshots* + +URLs: + +- https://cloud.google.com/compute/docs/disks/about-snapshot-schedules +- https://cloud.google.com/compute/docs/reference/rest/v1/snapshots + +Rule consequence: + +1. Schedule-created snapshots are a strong intentional-backup signal. +2. To reduce false positives, snapshots with explicit schedule-created evidence should be excluded from this rule. + +### 3.5 Archive snapshots are a distinct low-cost, long-term retention class + +Google documents: + +1. Standard and archive snapshots are different snapshot types. +2. Archive snapshots are intended for compliance, audit, and long-term cold storage. +3. Archive snapshots are lower-cost and optimized for long retention rather than fast restore. + +Source: + +- *About archive and standard disk snapshots* + +URL: + +- https://cloud.google.com/compute/docs/disks/snapshots + +Rule consequence: + +1. Archive snapshots are not strong candidates for an “old snapshot” hygiene rule. +2. Archive snapshots should be excluded when the snapshot type is explicitly known. + +### 3.6 Snapshot scope and storage location are not the same thing + +Google documents: + +1. Standard snapshots are globally scoped by default, and regionally scoped snapshots also exist. +2. `storageLocations` describes where snapshot data is stored. +3. The snapshot resource does not expose a simple canonical `region` field comparable to zonal or regional Compute resources. + +Source: + +- *About archive and standard disk snapshots* + +URL: + +- https://cloud.google.com/compute/docs/disks/snapshots + +Rule consequence: + +1. This rule should treat snapshots as a project-level inventory surface. +2. `region_filter` should be ignored rather than guessed from `storageLocations`. +3. `storageLocations` is context only and must not be treated as a region-filter surrogate. + +### 3.7 Snapshot pricing varies by snapshot type, scope, and storage location + +Google documents that snapshot pricing lives on the Compute Engine disk and image pricing page, with prices listed in USD and region/currency-specific values available via SKUs and billing surfaces. + +Source: + +- *Disk and image pricing* + +URL: + +- https://cloud.google.com/compute/disks-image-pricing#disk_snapshots + +Rule consequence: + +1. This rule must not hardcode a flat monthly estimate such as `$2.60` or `$0.026/GB`. +2. `estimated_monthly_cost_usd` should remain `None` unless a future implementation uses a documented pricing model that correctly incorporates snapshot type and storage location. + +--- + +## 4. Detection Goal + +Emit only when the snapshot passes every rule in section **8**. Section **8** is the single source of truth for decisioning; sections **7** and **9** define normalization and evaluation contracts. + +Decision precedence is: + +1. normalize required fields +2. apply hard lifecycle and age exclusions +3. exclude archive and schedule-created snapshots +4. emit only when no exclusion applies + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that deleting the snapshot is safe +- that deleting the snapshot will reduce cost proportionally +- that the snapshot is unused or orphaned +- that the source disk no longer exists +- that the snapshot is not required for backup, audit, DR, or compliance + +--- + +## 6. Canonical Inputs + +### 6.1 Required surfaces + +| Surface | Purpose | +|---|---| +| `snapshots.list` | enumerate snapshot resources and their lifecycle, age, billed-storage, schedule, and type metadata | +| Disk/image pricing page | authoritative pricing model source for snapshot pricing variability | + +### 6.2 Authentication / permissions + +Required permission: + +- `compute.snapshots.list` + +Typical predefined role: + +- `roles/compute.viewer` + +### 6.3 Thresholds + +| Parameter | Meaning | +|---|---| +| `max_age_days` | Review threshold in days; default `90` | + +This is a product-policy review threshold, not a Google-defined idle threshold. + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `status` | Resolve from documented snapshot `status` and compare case-sensitively to canonical values such as `"READY"` and `"FAILED"`. | +| `creation_timestamp` | Parse from documented RFC3339 `creationTimestamp`. Unparseable values are unusable. | +| `age_days` | Compute `age_days` as whole UTC days between `now` and parsed `creation_timestamp`. | +| `snapshot_type` | Preserve exact documented `snapshotType` when present; if absent, preserve as unknown rather than guessing. | +| `auto_created` | Preserve exact boolean from `autoCreated`; absent means unknown. | +| `source_snapshot_schedule_policy` | Preserve exact string when present. | +| `source_snapshot_schedule_policy_id` | Preserve exact string when present. | +| `source_disk` | Preserve exact documented value for context only. | +| `source_disk_id` | Preserve exact documented value for context only. | +| `disk_size_gb` | Parse as non-negative integer when possible; otherwise preserve unknown/`0` for context only. | +| `storage_bytes` | Parse as non-negative integer when possible; otherwise preserve `0` for context only. | +| `storage_bytes_status` | Preserve exact documented value when present. | +| `storage_locations` | Preserve as list for context only; never use as region-filter proxy. | +| `chain_name` | Preserve exact documented `chainName` when present; context only. | +| `labels` | `snapshot.labels or {}` - never `None` in output. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | snapshot record is malformed or `name` absent / empty | Skip | +| 8.2 | `status` is absent, unknown, or not exactly `"READY"` | Skip | +| 8.3 | `creationTimestamp` is absent or unparsable | Skip | +| 8.4 | `age_days < max_age_days` | Skip | +| 8.5 | `snapshotType == "ARCHIVE"` | Skip | +| 8.6 | `sourceSnapshotSchedulePolicy` or `sourceSnapshotSchedulePolicyId` is present and non-empty | Skip | +| 8.7 | `autoCreated == true` | Skip | +| 8.8 | all required signals resolve and no exclusion conditions apply | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Inventory contract + +Required behavior: + +1. Enumerate snapshots from `snapshots.list`. +2. Fully iterate the paged result; do not stop after the first page. +3. Ignore `region_filter` because the snapshot resource does not expose a canonical comparable region field for this rule. + +### 9.2 Lifecycle contract + +Required behavior: + +1. Only `status == "READY"` is eligible. +2. `CREATING`, `DELETING`, `FAILED`, and `UPLOADING` must skip. +3. Unknown or unresolved status must skip. + +### 9.3 Age contract + +Required behavior: + +1. Parse `creationTimestamp` as RFC3339. +2. Use the current UTC system time as `now`. +3. Compute `age_days` as whole UTC days. +4. Emit only when `age_days >= max_age_days`. +5. If `creationTimestamp` cannot be parsed, skip rather than guess. + +### 9.4 Schedule-created exclusion contract + +Required behavior: + +1. A non-empty `sourceSnapshotSchedulePolicy` must skip. +2. A non-empty `sourceSnapshotSchedulePolicyId` must skip. +3. `autoCreated == true` must skip. + +Rationale: + +Google documents snapshot schedules as intentional recurring backups and exposes schedule-origin metadata directly on the snapshot resource. + +### 9.5 Archive-snapshot exclusion contract + +Required behavior: + +1. If `snapshotType` is present and exactly `"ARCHIVE"`, skip. +2. If `snapshotType` is absent, do not infer archive status. + +Rationale: + +Google documents archive snapshots as a separate low-cost long-retention class intended for compliance/audit/cold-storage use. + +### 9.6 Source-disk contract + +Required behavior: + +1. `sourceDisk` and `sourceDiskId` may appear in evidence/details only. +2. Do **not** infer “source disk deleted” from empty or missing `sourceDisk`. +3. Do **not** raise confidence solely because `sourceDisk` is absent. + +### 9.7 Cost model contract + +Required behavior: + +1. `estimated_monthly_cost_usd = None` +2. Do **not** estimate cost from `diskSizeGb`. +3. Do **not** estimate cost from age. +4. Do **not** hardcode a flat per-GB monthly rate. +5. `storageBytes` may appear as billed-storage context only, including when it is very small or zero. +6. If `storageBytesStatus` is present, it should also be surfaced as context because the billed-storage value can be changing. + +Rationale: + +Google documents `storageBytes` as billed storage used by the snapshot and documents that it can change as snapshots are created or deleted. Google also documents snapshot pricing on the pricing page rather than in the resource itself, and pricing varies by snapshot type and storage location. + +### 9.8 Confidence contract + +Required behavior: + +| Condition | Confidence | +|---|---| +| Finding emitted | `LOW` | + +Rationale: + +Even after excluding archive and schedule-created snapshots, age alone does not prove that a snapshot is waste, unused, or safe to delete. + +### 9.9 Risk contract + +Required behavior: + +| Condition | Risk | +|---|---| +| Finding emitted | `LOW` | + +### 9.10 Failure behavior contract + +Required behavior: + +1. Permission failures for snapshot inventory should surface as a permission error, not silent empty findings. +2. If the Compute Engine API for snapshots is unavailable / disabled for the project, returning no findings is acceptable. +3. Malformed snapshot records should be skipped item-by-item rather than failing the whole rule. + +--- + +## 10. Finding Shape + +### 10.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"gcp"` | +| `rule_id` | `"gcp.compute.snapshot.old"` | +| `resource_type` | `"gcp.compute.snapshot"` | +| `resource_id` | canonical project/global snapshot path | +| `region` | `"global"` | +| `confidence` | `LOW` | +| `risk` | `LOW` | +| `estimated_monthly_cost_usd` | `None` | + +### 10.2 Required evidence + +`signals_used` must clearly disclose: + +1. snapshot `status` is `READY` +2. snapshot age in days +3. threshold in days +4. if present, `storageBytes` and `storageBytesStatus` as context only +5. if present, snapshot type as context only +6. if present, snapshot is part of a named incremental chain when `chain_name` is present (from `chainName`) + +`signals_not_checked` should include remaining blind spots such as: + +1. business/application retention intent +2. DR / audit / compliance intent +3. snapshot restore frequency or operational usage was not evaluated +4. whether deleting this snapshot would materially reduce billed storage +5. exact monthly pricing from current storage location and snapshot type + +### 10.3 Required details + +Details should include at least: + +- `snapshot_name` +- `created_at` +- `age_days` +- `max_age_days_threshold` +- `disk_size_gb` +- `storage_bytes` +- `storage_bytes_status` +- `storage_locations` +- `snapshot_type` +- `auto_created` +- `source_snapshot_schedule_policy` when present +- `source_snapshot_schedule_policy_id` when present +- `source_disk` when present +- `source_disk_id` when present +- `chain_name` when present +- `labels` + +--- + +## 11. Failure Behavior + +- Snapshot inventory permission denied -> raise permission error +- Compute Engine API disabled / not found for the project -> return no findings +- Malformed snapshot records -> skip those items +- `region_filter` ignored -> do not guess from storage locations~~ diff --git a/docs/specs/gcp/sql_instance_idle.md b/docs/specs/gcp/sql_instance_idle.md new file mode 100644 index 0000000..8195861 --- /dev/null +++ b/docs/specs/gcp/sql_instance_idle.md @@ -0,0 +1,492 @@ +# GCP Rule Spec - `gcp.sql.instance.idle` + +## 1. Rule Identity + +- **Rule ID:** `gcp.sql.instance.idle` +- **Provider:** GCP +- **Resource type:** Cloud SQL instance +- **Finding resource_type:** `gcp.sql.instance` + +--- + +## 2. Intent + +Detect **primary Cloud SQL instances** that show **no observed active database connections** for the full configured idle window and therefore represent conservative review candidates for cleanup, stop/start reconsideration, or rightsizing. + +This rule is deliberately **precision-first**. It is a **review-candidate** rule only, not proof that the instance is safe to delete, not proof that no business continuity purpose exists, and not proof of a specific monthly saving. + +--- + +## 3. GCP Documentation Grounding + +### 3.1 Cloud SQL instance resource exposes the canonical control-plane fields + +Google documents the Cloud SQL `DatabaseInstance` resource with fields including: + +1. `state` +2. `instanceType` +3. `name` +4. `region` +5. `databaseVersion` +6. `createTime` +7. `masterInstanceName` +8. `settings` +9. `failoverReplica` +10. `replicaNames` +11. `tags` + +Within `settings`, Google documents fields including: + +1. `tier` +2. `availabilityType` +3. `dataDiskSizeGb` +4. `dataDiskType` +5. `userLabels` +6. backup configuration fields + +Source: + +- *Resource: instances* + +URL: + +- https://cloud.google.com/sql/docs/mysql/admin-api/rest/v1beta4/instances + +Rule consequence: + +1. Instance eligibility must be based on documented Cloud SQL Admin API fields, not inferred state. +2. `createTime` is the canonical age signal for full-window coverage. +3. Tier / HA / storage fields are valid context, but not enough for a trustworthy fixed cost estimate. + +### 3.2 Cloud SQL documents a canonical active-connections metric + +Google documents Cloud SQL metrics exposed through Google Cloud Observability, including: + +- `cloudsql.googleapis.com/database/active_connections` + +Google documents these metrics on the `cloudsql_database` monitored resource and documents that Cloud SQL metrics are sampled every 60 seconds and can be delayed by up to 165 seconds before visibility. + +Google documents the `cloudsql_database` monitored resource with identity labels including: + +1. `project_id` +2. `location` +3. `resource_id` +4. `database` + +Sources: + +- *Cloud SQL metrics* +- *Monitored resource list* + +URLs: + +- https://docs.cloud.google.com/sql/docs/mysql/admin-api/metrics +- https://cloud.google.com/monitoring/api/resources#tag_cloudsql_database + +Rule consequence: + +1. The idle rule must use the documented `database/active_connections` metric, not undocumented or stale alternatives. +2. The observation window must account for documented metric visibility lag. +3. Metric-to-instance matching must use documented monitored-resource identity labels only, with no best-guess fallback. +4. If the metric cannot be resolved reliably for an instance, that instance must be skipped rather than emitted. + +### 3.3 Read replicas are operationally special even though they are billed + +Google documents that read replicas: + +1. offload read queries and analytics traffic from the primary +2. can be promoted for disaster recovery or corruption recovery +3. are read-only copies updated in near real time + +Google pricing documentation also states: + +1. read replicas and failover replicas are charged at the same rate as stand-alone instances + +Sources: + +- *Replication* +- *Cloud SQL pricing* + +URLs: + +- https://cloud.google.com/sql/docs/mysql/replication +- https://cloud.google.com/sql/pricing + +Rule consequence: + +1. Replica-shaped instances should be treated conservatively because zero client connections does not prove lack of operational value. +2. This rule should exclude documented read-replica shapes even though they remain billable. + +### 3.4 HA / regional instances remain billable and operationally important + +Google documents that: + +1. a high-availability Cloud SQL instance is a regional instance with a primary and standby +2. the standby instance cannot be used for read queries +3. an HA-configured instance costs about twice as much as a standalone instance + +Source: + +- *High availability* + +URL: + +- https://cloud.google.com/sql/docs/mysql/high-availability + +Rule consequence: + +1. HA does **not** make an instance ineligible. +2. HA materially increases cost/risk context and should be surfaced in evidence/details when present. + +### 3.5 Cloud SQL pricing is too variable for a fixed rule-time estimate + +Google documents that Cloud SQL pricing varies by factors including: + +1. edition +2. region +3. vCPU and memory +4. high availability +5. storage and networking +6. commitment model +7. engine / licensing surface + +Source: + +- *Cloud SQL pricing* + +URL: + +- https://cloud.google.com/sql/pricing + +Rule consequence: + +1. The rule must not hardcode a stale tier lookup table or a single-region estimate. +2. `estimated_monthly_cost_usd` should remain `None` unless a future implementation computes pricing from documented current pricing inputs. + +--- + +## 4. Detection Goal + +Emit a finding only when **all** of the following are true: + +1. `instance.name` is present and non-empty +2. `instance.region` is present and non-empty +3. the optional region filter matches the normalized region +4. `instance.state` resolves to exactly `"RUNNABLE"` +5. `instance.instanceType` resolves to exactly `"CLOUD_SQL_INSTANCE"` +6. replica exclusion contract is **not** triggered +7. `createTime` is present, parseable, and old enough to cover the full observation window +8. the Cloud Monitoring metric contract resolves reliably for the instance +9. the maximum observed `active_connections` value is exactly zero for the full observation window + +If any required signal cannot be established reliably, skip rather than emit. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that deleting the instance is safe +- that the instance is not needed for DR, failover, migration, or future reactivation +- that short-lived burst traffic never occurred between metric samples +- that zero active client connections implies zero internal or background workload +- that the instance has zero storage, backup, or network cost +- that the instance produces a specific monthly saving + +--- + +## 6. Canonical Inputs + +### 6.1 Required control-plane surface + +| Surface | Purpose | +|---|---| +| `instances.list` | enumerate Cloud SQL instances and their lifecycle, type, region, age, tier, HA, storage, and replica context | + +### 6.2 Required monitoring surface + +| Surface | Purpose | +|---|---| +| `cloudsql.googleapis.com/database/active_connections` on `cloudsql_database` | determine whether any active connections were observed during the idle window | + +### 6.3 Idle window + +| Parameter | Meaning | +|---|---| +| `idle_days` | Review threshold in days; default `14` | + +Window definition: + +1. `window_end` must account for documented metric visibility lag and therefore should be set conservatively after the latest potentially delayed point (for example, `now - 5 minutes`) +2. `window_start = window_end - idle_days` + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `name` | Non-empty string or unusable. | +| `region` | Preserve exact documented region string; compare by exact string equality only. | +| `state` | Resolve from documented instance `state` and compare case-sensitively to canonical values such as `"RUNNABLE"`, `"SUSPENDED"`, `"MAINTENANCE"`, and `"FAILED"`. | +| `instance_type` | Resolve from documented `instanceType` and compare case-sensitively to canonical values such as `"CLOUD_SQL_INSTANCE"`, `"READ_REPLICA_INSTANCE"`, and `"ON_PREMISES_INSTANCE"`. | +| `create_time` | Parse from documented RFC3339 `createTime`. Unparseable values are unusable. | +| `master_instance_name` | Preserve exact documented string when present. | +| `database_version` | Preserve exact documented value when present; otherwise unknown. | +| `tier` | Preserve exact `settings.tier` when present; otherwise unknown. | +| `availability_type` | Preserve exact `settings.availabilityType` when present; otherwise unknown. | +| `data_disk_size_gb` | Parse as non-negative integer when possible; otherwise preserve unknown/`0` for context only. | +| `data_disk_type` | Preserve exact `settings.dataDiskType` when present; otherwise unknown. | +| `backup_retained_count` | Parse as non-negative integer when possible; otherwise unknown. | +| `labels` | `settings.userLabels or {}` - never `None` in output. | +| `metric_coverage` | `FULL` only when the full idle window is covered within documented sampling/visibility tolerance; otherwise unresolved. | +| `active_connections_max` | Maximum numeric value observed from the documented active-connections metric over the full eligible window. If unresolved, the metric is unusable. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | instance record malformed or `name` absent / empty | Skip | +| 8.2 | `region` absent / empty | Skip | +| 8.3 | region filter set and `region` does not exactly match | Skip | +| 8.4 | `state` absent, unknown, or not exactly `"RUNNABLE"` | Skip | +| 8.5 | `instanceType` absent, unknown, or not exactly `"CLOUD_SQL_INSTANCE"` | Skip | +| 8.6 | replica exclusion contract is triggered | Skip | +| 8.7 | `createTime` absent or unparsable | Skip | +| 8.8 | instance creation time is newer than `window_start` | Skip | +| 8.9 | active-connections metric cannot be resolved reliably for the full window | Skip | +| 8.10 | `active_connections_max > 0` anywhere in the full window | Skip | +| 8.11 | all required signals resolve and `active_connections_max == 0` for the full window | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Inventory contract + +Required behavior: + +1. Enumerate instances from `instances.list`. +2. Fully iterate paged results if pagination is present. +3. Malformed instance records must be skipped item-by-item rather than failing the whole rule. + +### 9.2 Serving-state contract + +Google documents Cloud SQL instance states including: + +- `RUNNABLE` +- `SUSPENDED` +- `PENDING_DELETE` +- `PENDING_CREATE` +- `MAINTENANCE` +- `FAILED` +- `ONLINE_MAINTENANCE` + +Required behavior: + +1. Only `state == "RUNNABLE"` is eligible. +2. `MAINTENANCE` and `ONLINE_MAINTENANCE` must skip because they can show temporarily low or zero connections during service-managed transitions. +3. Unknown or any other state must skip. + +### 9.3 Primary-instance contract + +Required behavior: + +1. Only `instanceType == "CLOUD_SQL_INSTANCE"` is eligible. +2. `READ_REPLICA_INSTANCE` must skip. +3. `ON_PREMISES_INSTANCE` must skip. + +### 9.4 Replica exclusion contract + +Required behavior: + +1. If `instanceType == "READ_REPLICA_INSTANCE"`, skip. +2. If `masterInstanceName` is present and non-empty, treat the instance as replica-shaped and skip. +3. `replicaNames` on its own does **not** make a primary instance ineligible. + +Rationale: + +Google documents read replicas as read-offload and disaster-recovery resources. Zero observed active connections does not prove lack of value for these replica-shaped instances. + +### 9.5 Age / full-window coverage contract + +Required behavior: + +1. Parse `createTime` as RFC3339. +2. Compute `window_end` with a conservative buffer for documented metric visibility lag. +3. Compute `window_start = window_end - idle_days`. +4. Emit only if `create_time <= window_start`. +5. Partial-window evaluation is not allowed. +6. If `createTime` cannot be parsed, skip rather than guess. + +### 9.6 Monitoring metric contract + +Required behavior: + +1. Query the documented metric `cloudsql.googleapis.com/database/active_connections`. +2. Query it on the documented `cloudsql_database` monitored resource. +3. Match time series to the Cloud SQL instance only by exact documented monitored-resource identity labels: + - `project_id == project_id` + - `location == instance.region` + - `resource_id == instance.name` (case-sensitive exact match required) +4. Use the `database` monitored-resource label only as a series dimension; do not use it to best-guess an instance match when the identity labels do not match exactly. +5. If the identity labels do not match exactly, skip rather than guess. +6. Use the full observation window defined in section **6.3**. +7. No time series returned for the instance means unresolved coverage and must skip. +8. Missing points, large missing chunks, parse failures, or query failures for that instance mean unresolved coverage and must skip. +9. Small gaps consistent with documented sampling and visibility lag may be tolerated; partial-window or materially sparse coverage must not be treated as idle. +10. Do **not** substitute undocumented or stale metric names such as `cloudsql.googleapis.com/database/network/connections`. +11. Do **not** substitute CPU metrics, connection-attempt metrics, or other fallback signals for the documented active-connections metric. + +### 9.7 Idle decision contract + +Required behavior: + +1. Aggregate across **all** matched `active_connections` time series for the instance, including all matched `database` label variants. +2. Resolve the maximum observed `active_connections` value across all matched series and across the full eligible window. +3. If any matched point has a value greater than zero, treat the instance as active. +4. Emit only when the maximum observed value is exactly zero for the full eligible window. + +Rationale: + +The documented metric is a GAUGE sampled periodically. Zero observed active connections for the full eligible window is a strong review signal, but it is still not proof that no short-lived work occurred between samples or that the engine had no internal/background activity. + +### 9.8 Region-filter contract + +Required behavior: + +1. Compare the optional `region_filter` only to the documented Cloud SQL instance `region`. +2. Do **not** derive region from monitoring labels, zones, or IP metadata when the control-plane region is absent or unusable. + +### 9.9 HA context contract + +Required behavior: + +1. `availabilityType == "REGIONAL"` should not exclude the instance. +2. HA / regional context should be surfaced in evidence/details when present. + +### 9.10 Cost model contract + +Required behavior: + +1. `estimated_monthly_cost_usd = None` +2. Do **not** use a hardcoded tier-to-price lookup table. +3. Do **not** use a single-region default pricing assumption. +4. Do **not** estimate total cost from tier alone. +5. Tier, HA, storage, and backup configuration may appear as context only. + +Rationale: + +Google documents Cloud SQL pricing as varying by edition, region, compute shape, HA, storage, networking, and commitment model. A stale or partial lookup table is not trustworthy enough for canonical rule output. + +### 9.11 Confidence contract + +Required behavior: + +| Condition | Confidence | +|---|---| +| Finding emitted | `HIGH` | + +Rationale: + +Zero observed active connections over the full eligible monitoring window is a strong Cloud Monitoring-backed idle signal, provided the metric contract resolves reliably. + +### 9.12 Risk contract + +Required behavior: + +| Condition | Risk | +|---|---| +| Finding emitted | `HIGH` | + +Rationale: + +Database resources are high-blast-radius assets. Even clearly idle-looking Cloud SQL instances can still carry application, migration, DR, or compliance importance. + +### 9.13 Failure behavior contract + +Required behavior: + +1. `cloudsql.instances.list` permission failures should surface as a permission error. +2. `monitoring.timeSeries.list` permission failures should surface as a permission error. +3. If the Cloud SQL Admin API is unavailable / disabled for the project, returning no findings is acceptable. +4. If Cloud Monitoring is unavailable for the project, returning no findings is acceptable. +5. Per-instance metric resolution failures should skip that instance rather than emitting from incomplete evidence. + +--- + +## 10. Finding Shape + +### 10.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"gcp"` | +| `rule_id` | `"gcp.sql.instance.idle"` | +| `resource_type` | `"gcp.sql.instance"` | +| `resource_id` | canonical project/instance path | +| `region` | instance region | +| `confidence` | `HIGH` | +| `risk` | `HIGH` | +| `estimated_monthly_cost_usd` | `None` | + +### 10.2 Required evidence + +`signals_used` must clearly disclose: + +1. instance state is `RUNNABLE` +2. instance type is primary Cloud SQL (`CLOUD_SQL_INSTANCE`) +3. metric coverage is full for the configured window +4. no observed active connections over the configured window +5. `active_connections_max = 0` +6. the exact idle window in days +7. database version +8. tier when present +9. HA / regional context when present +10. storage / backup context when present + +`signals_not_checked` should include remaining blind spots such as: + +1. short-lived workload bursts between metric samples were not evaluated +2. business / application retention intent +3. migration, failback, or future reactivation intent +4. storage, backup, and network savings were not estimated +5. engine-specific internal work not represented by active client connections alone + +### 10.3 Required details + +Details should include at least: + +- `instance_name` +- `database_version` +- `tier` +- `region` +- `instance_type` +- `created_at` +- `idle_days_threshold` +- `metric_coverage` +- `active_connections_max` +- `ha_enabled` +- `labels` + +When present, details should also include: + +- `master_instance_name` +- `availability_type` +- `data_disk_size_gb` +- `data_disk_type` +- `backup_retained_count` + +--- + +## 11. Failure Behavior + +- Cloud SQL list permission denied -> raise permission error +- Monitoring permission denied -> raise permission error +- Cloud SQL Admin API disabled / not found -> return no findings +- Cloud Monitoring unavailable -> return no findings +- Malformed instance record -> skip that item +- Unusable per-instance activity metric -> skip that item diff --git a/docs/specs/gcp/vm_stopped.md b/docs/specs/gcp/vm_stopped.md new file mode 100644 index 0000000..369ea66 --- /dev/null +++ b/docs/specs/gcp/vm_stopped.md @@ -0,0 +1,482 @@ +# GCP Rule Spec - `gcp.compute.vm.stopped` + +## 1. Rule Identity + +- **Rule ID:** `gcp.compute.vm.stopped` +- **Provider:** GCP +- **Resource type:** Compute Engine VM instance +- **Finding resource_type:** `gcp.compute.instance` + +--- + +## 2. Intent + +Detect **Compute Engine VM instances in the documented stopped lifecycle state** that have remained stopped for at least the configured threshold and therefore represent conservative review candidates for cleanup of lingering attached-cost surfaces. + +This rule is deliberately **precision-first**. It is a **review-candidate** rule only, not proof that the VM is abandoned, not proof that attached resources should be deleted, and not proof of a specific monthly saving. + +### 2.1 Canonical definitions + +| Term | Definition | +|---|---| +| `STOPPED_VM` | Normalized stopped lifecycle state: exact raw `"TERMINATED"` or exact raw `"STOPPED"` | +| active MIG membership | The instance is currently controlled by a managed instance group, not merely historically created by or once attached to one | + +--- + +## 3. GCP Documentation Grounding + +### 3.1 Instance resource exposes canonical lifecycle and stop/start timestamps + +Google documents the Compute Engine `Instance` resource with fields including: + +1. `status` +2. `creationTimestamp` +3. `zone` +4. `machineType` +5. `disks` +6. `labels` +7. `lastStartTimestamp` +8. `lastStopTimestamp` +9. `lastSuspendedTimestamp` +10. `scheduling` + +Source: + +- *Resource: instances* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/instances + +Rule consequence: + +1. VM eligibility must be based on documented instance control-plane fields. +2. `lastStopTimestamp` is the canonical stop-age signal for this rule. +3. `lastStartTimestamp`, disk attachment shape, and scheduling fields are context only. + +### 3.2 Compute Engine lifecycle defines the exact stopped state + +Google documents instance lifecycle states including: + +- `PROVISIONING` +- `STAGING` +- `RUNNING` +- `STOPPING` +- `TERMINATED` +- `REPAIRING` +- `SUSPENDING` +- `SUSPENDED` + +Google also documents that `TERMINATED` means Compute Engine has completed the stop operation and the attached resources remain attached unless detached. + +Google's lifecycle documentation also distinguishes between UI wording and API wording: when you stop a VM, the Google Cloud console shows the instance as **stopped**, while the Compute Engine API reports the same stopped lifecycle state as `TERMINATED`. + +Sources: + +- *Compute Engine instance lifecycle* +- *Resource: instances* + +URLs: + +- https://docs.cloud.google.com/compute/docs/instances/instance-lifecycle +- https://cloud.google.com/compute/docs/reference/rest/v1/instances + +Rule consequence: + +1. The rule uses the canonical definition of `STOPPED_VM`. +2. Transitional states such as `STOPPING` and `SUSPENDING` must skip. +3. Suspended-state resources are out of scope for this rule because suspend has different billing semantics. + +### 3.3 Billing semantics differ for stopped versus suspended instances + +Google documents that: + +1. CPU usage is billed while an instance is `RUNNING` or `PENDING_STOP` +2. memory usage is billed while an instance is `RUNNING`, `PENDING_STOP`, `SUSPENDING`, or `SUSPENDED` +3. attached resources such as disks or external IP addresses are billed until the resources no longer exist, regardless of instance state + +Google also documents that while an instance is in `STOPPING` or `TERMINATED`, you do not incur CPU charges, but attached resources remain billable. + +Sources: + +- *Compute Engine instance lifecycle* +- *Suspend, stop, or reset Compute Engine instances* + +URLs: + +- https://docs.cloud.google.com/compute/docs/instances/instance-lifecycle +- https://docs.cloud.google.com/compute/docs/instances/suspend-stop-reset-instances-overview + +Rule consequence: + +1. This rule is about **stopped instances with still-billable attached resources**, not about billed CPU/runtime. +2. `SUSPENDED` must stay out of scope because suspended instances still have memory-storage billing semantics not shared with stopped instances. +3. The rule must not estimate total cost from VM runtime pricing alone. + +### 3.4 Stopped instances retain attached resources and some identifiers + +Google documents that when you stop an instance: + +1. attached disks are maintained +2. internal IP and MAC addresses are maintained +3. static external IP addresses are maintained +4. ephemeral external IP addresses are released + +Source: + +- *Suspend, stop, or reset Compute Engine instances* + +URL: + +- https://docs.cloud.google.com/compute/docs/instances/suspend-stop-reset-instances-overview + +Rule consequence: + +1. Disk and static-IP context can be relevant to cleanup review. +2. The rule must not assume every stopped instance still has an external IP cost. +3. Attached-resource counts and shapes are valid evidence/details context, but not enough for a trustworthy universal cost estimate. + +### 3.5 Managed instance groups are operationally different + +Google documents that managed instance groups (MIGs) manage VM lifecycle and can resize, recreate, autoheal, and repair instances. Google also documents VM detail surfaces that show whether a VM is part of an instance group, and managed-instance-group info surfaces identify the VM instances that belong to each MIG. + +Sources: + +- *View the details of a VM* +- *View info about MIGs and managed instances* + +URLs: + +- https://docs.cloud.google.com/compute/docs/instances/view-vm-details +- https://docs.cloud.google.com/compute/docs/instance-groups/getting-info-about-migs + +Rule consequence: + +1. If the implementation can prove that a VM has active MIG membership, the rule should skip it. +2. Managed instances should not be treated the same as standalone stopped VMs because the group can intentionally recreate, repair, or replace them. +3. Proof of active MIG membership may come only from first-party instance-group or instance-metadata signals exposed by Google Cloud surfaces. +4. Example proof signals include instance metadata such as `created-by` referencing `instanceGroupManagers/...`, or direct managed-instance-group membership surfaces. +5. The rule must not guess from names, labels, or other weak heuristics. + +### 3.6 Aggregated instance inventory supports partial success + +Google documents that `instances.aggregatedList` retrieves instances across all regions and zones and recommends setting `returnPartialSuccess=true` to prevent total failure on large or partially failing projects. + +Google also documents that partial success can return scope-level warnings rather than a fully complete inventory. + +Source: + +- *Method: instances.aggregatedList* + +URL: + +- https://cloud.google.com/compute/docs/reference/rest/v1/instances/aggregatedList + +Rule consequence: + +1. Aggregated inventory should opt in to partial success. +2. Partial coverage must be surfaced and must not be treated as a clean project. + +### 3.7 Pricing is too variable for a fixed stopped-VM cost estimate + +Google documents that: + +1. VM runtime pricing is separate from disk, image, networking, sole tenancy, and GPU pricing +2. attached resources keep billing according to their own pricing surfaces +3. static external IP pricing is documented separately from VM runtime pricing + +Sources: + +- *Compute Engine VM instance pricing* +- *All networking pricing* + +URLs: + +- https://cloud.google.com/compute/vm-instance-pricing +- https://cloud.google.com/vpc/network-pricing#ipaddress + +Rule consequence: + +1. The rule must not hardcode a flat per-GB stopped-VM estimate. +2. `estimated_monthly_cost_usd` should remain `None` unless a future implementation computes pricing from authoritative current resource-specific pricing inputs. + +--- + +## 4. Detection Goal + +Emit a finding only when **all** of the following are true: + +1. `instance.name` is present and non-empty +2. the aggregated scope key resolves to an exact zone +3. if a region filter is set, the normalized region is parseable and matches it +4. the instance is not proven to have active MIG membership +5. the normalized lifecycle state resolves to `STOPPED_VM` +6. `lastStopTimestamp` is present and parseable +7. stop age is greater than or equal to `max_age_days` + +If any required signal cannot be established reliably, skip rather than emit. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that the VM is abandoned +- that the VM should be deleted +- that attached disks or static IPs are safe to remove +- that the VM is not intentionally kept for forensics, rollback, or future restart +- that a specific monthly saving exists + +--- + +## 6. Canonical Inputs + +### 6.1 Required surface + +| Surface | Purpose | +|---|---| +| `instances.aggregatedList` | enumerate VM instances across zones with lifecycle, timestamps, disks, labels, machine type, and scheduling context | + +### 6.2 Threshold + +| Parameter | Meaning | +|---|---| +| `max_age_days` | Review threshold in days; default `30` | + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `status` | Resolve the raw lifecycle state from the source surface. Normalize it using the canonical `STOPPED_VM` definition. Otherwise preserve the raw state. | +| `zone_scope` | Resolve only from exact aggregated scope keys in the form `zones/ZONE`. Any other scope key is unusable. | +| `zone` | Extract exact zone name from the resolved `zone_scope`. | +| `region` | Derive from the resolved zone when parseable. If region derivation is not parseable from the zone string, preserve `"unknown"` rather than skipping evaluation. | +| `last_stop_timestamp` | Parse from documented RFC3339 `lastStopTimestamp`. Unparseable values are unusable. | +| `last_start_timestamp` | Preserve exact documented RFC3339 value when present; context only. | +| `machine_type` | Preserve the final machine type segment when the URL/path is parseable; otherwise unknown. | +| `mig_membership` | Preserve only if the implementation can prove active MIG membership from the allowed first-party proof sources in this spec: direct managed-instance-group membership surfaces, or current instance metadata such as `created-by` referencing `instanceGroupManagers/...`. No guessing. | +| `persistent_disk_count` | Count only attached disks where `type == "PERSISTENT"`. | +| `persistent_disk_total_gb` | Sum attached persistent `diskSizeGb` values as non-negative integers when parseable; otherwise preserve unknown/`0` for context only. | +| `disk_kinds_present` | Preserve, on a best-effort basis from instance-attached disk metadata, the distinct attached-disk kinds exposed on the instance surface, such as `PERSISTENT` and `SCRATCH`, as context only. | +| `boot_disk_count` | Count attached disks where documented boot flag is true. | +| `external_nat_ip_present` | True when any network interface access config exposes `natIP`; context only. | +| `gpu_attached` | True when `guestAccelerators` contains one or more accelerator attachments; context only. | +| `labels` | `instance.labels or {}` - never `None` in output. | +| `automatic_restart` | Preserve exact `scheduling.automaticRestart` when present; context only. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | instance record malformed or `name` absent / empty | Skip | +| 8.2 | aggregated scope key does not resolve to exact `zones/ZONE` | Skip | +| 8.3 | region filter set and normalized region is unknown or does not exactly match | Skip and surface `region_unparseable` only in `signals_not_checked` or `skip_reason` when region is unknown | +| 8.4 | instance is proven to have active MIG membership | Skip | +| 8.5 | normalized lifecycle state absent, unknown, or not `STOPPED_VM` | Skip | +| 8.6 | `lastStopTimestamp` absent or unparsable | Skip and surface `missing_last_stop_timestamp` only in `signals_not_checked` or `skip_reason` when such diagnostics exist | +| 8.7 | stop age `< max_age_days` | Skip | +| 8.8 | all required signals resolve and stop age `>= max_age_days` | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Inventory contract + +Required behavior: + +1. Enumerate instances from `instances.aggregatedList`. +2. Use `returnPartialSuccess=true`. +3. Fully iterate all pages. +4. Surface any partial-coverage warnings. +5. Partial inventory coverage must not be treated as a clean project. + +### 9.2 Scope / zone contract + +Required behavior: + +1. Accept only aggregated scope keys in exact `zones/ZONE` form. +2. Extract the zone name from the scope key only when the key resolves unambiguously. +3. Derive region from the zone when parseable. +4. If region derivation is not parseable from the zone string, preserve `"unknown"` rather than guessing. +5. If region derivation is not parseable and diagnostics exist, use the literal code `region_unparseable` only in `signals_not_checked` or `skip_reason`. +6. If a region filter is set and normalized region is `"unknown"`, skip because the filter cannot be evaluated reliably. +7. If the scope key itself is malformed, skip rather than guess. + +Rationale: + +Preserving `"unknown"` is safer than guessing a region from a future or non-standard zone format. `region_unparseable` is a diagnostic-only signal and must never appear as emitted finding evidence. + +### 9.3 Lifecycle-state contract + +Required behavior: + +1. Normalize the stopped lifecycle state to internal `STOPPED_VM`. +2. Apply the canonical `STOPPED_VM` definition consistently across supported source surfaces. +3. Transitional or non-stopped states such as `PROVISIONING`, `STAGING`, `RUNNING`, `STOPPING`, `REPAIRING`, `SUSPENDING`, and `SUSPENDED` must skip. +4. Unknown or unresolved status must skip. + +Rationale: + +Google documents `TERMINATED` as the API state after the stop operation is complete, while many user-facing or tooling surfaces describe that same condition as stopped. Transitional and suspended states have different operational and billing semantics. + +### 9.4 MIG exclusion contract + +Required behavior: + +1. Skip only when the implementation can prove that a VM has active MIG membership. +2. Accept proof only from the allowed first-party proof sources in this spec. +3. Allowed proof sources are limited to: + a. direct managed-instance-group membership surfaces + b. current instance metadata such as `created-by` referencing `instanceGroupManagers/...` +4. Do not infer MIG membership from weak heuristics such as naming patterns, labels, vague metadata, or historical-only hints. +5. If MIG membership cannot be established reliably from the allowed proof sources, continue with normal evaluation rather than guessing. + +Rationale: + +Managed instance groups intentionally manage VM lifecycle, including replacement, repair, resizing, and recreation. A stopped VM inside a MIG is not equivalent to an independently owned standalone VM. + +### 9.5 Stop-age contract + +Required behavior: + +1. Parse `lastStopTimestamp` as RFC3339. +2. Compute `stop_age_days` as whole UTC days between `now` and parsed `last_stop_timestamp`. +3. Emit only when `stop_age_days >= max_age_days`. +4. If `lastStopTimestamp` is absent or unparsable, skip rather than guess. +5. Do **not** substitute `creationTimestamp`, `lastStartTimestamp`, or other lifecycle fields for stop age. +6. When skip diagnostics or debug signals exist, use the literal code `missing_last_stop_timestamp` for this blind spot. +7. `missing_last_stop_timestamp` belongs only in `signals_not_checked` or `skip_reason`; it must never appear as evidence for an emitted finding. + +Rationale: + +`lastStopTimestamp` is the canonical control-plane stop-age signal. Some older or otherwise atypical VMs might not expose a usable stop timestamp; this rule intentionally skips those instances rather than backfilling age from weaker signals, and should surface `missing_last_stop_timestamp` only in `signals_not_checked` or `skip_reason` when such diagnostics exist. A future extension could use audit-log evidence, but that is out of scope for this rule. + +### 9.6 Cost model contract + +Required behavior: + +1. `estimated_monthly_cost_usd = None` +2. Do **not** estimate cost from attached disk size alone. +3. Do **not** hardcode a flat pd-standard rate. +4. Persistent disk counts/sizes, attached-disk kinds, external NAT IP presence, boot-disk presence, and GPU attachment may appear as context only. + +Rationale: + +Google documents that attached resources continue billing, but the specific pricing depends on the actual resource mix and its own pricing surface. A fixed pd-standard estimate is not a trustworthy canonical result. + +### 9.7 Confidence contract + +Required behavior: + +| Condition | Confidence | +|---|---| +| `max_age_days <= stop_age_days < 90` | `MEDIUM` | +| `stop_age_days >= 90` | `HIGH` | + +Rationale: + +Stop age is the primary confidence driver. Confidence may be nudged upward within this age-led model when the stopped VM has no external NAT IP and no GPU attachment, because those traits reduce obvious restart dependencies. Large persistent-disk footprint can strengthen cleanup-review priority and may modestly strengthen confidence only in combination with the rest of the stopped-resource picture, but it must not override the age-led precision model by itself. +These nudges do not override the age-based confidence tiers. + +### 9.8 Risk contract + +Required behavior: + +| Condition | Risk | +|---|---| +| Finding emitted | `MEDIUM` | + +Rationale: + +Stopped VMs often still anchor important attached resources and can be intentionally retained for rollback, forensics, or later restart. + +### 9.9 Failure behavior contract + +Required behavior: + +1. `compute.instances.list` permission failures should surface as a permission error. +2. If the Compute Engine API is unavailable / disabled for the project, returning no findings is acceptable. +3. Malformed instance records should be skipped item-by-item rather than failing the whole rule. + +--- + +## 10. Finding Shape + +### 10.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"gcp"` | +| `rule_id` | `"gcp.compute.vm.stopped"` | +| `resource_type` | `"gcp.compute.instance"` | +| `resource_id` | canonical project/zone/instance path | +| `region` | derived region from zone, or `"unknown"` when region derivation is not parseable | +| `risk` | `MEDIUM` | +| `estimated_monthly_cost_usd` | `None` | + +### 10.2 Required evidence + +`signals_used` must clearly disclose: + +1. instance is in `STOPPED_VM` state, including the raw lifecycle state when useful +2. stop age in days +3. threshold in days +4. persistent disk count and total size as context only +5. machine type when present +6. boot-disk presence when present +7. `automaticRestart` context when present +8. attached-disk kinds when present +9. external NAT IP presence when present +10. GPU attachment flag when present + +Diagnostic-only codes such as `missing_last_stop_timestamp` and `region_unparseable` must never appear in `signals_used`; they belong only in `signals_not_checked` or `skip_reason`. + +`signals_not_checked` should include remaining blind spots such as: + +1. planned seasonal or scheduled shutdown intent +2. rollback, forensics, or future restart intent +3. exact resource-specific monthly pricing for disks and IPs was not estimated +4. static external IP usage and billing state were not fully resolved +5. `missing_last_stop_timestamp` for older or atypical VMs that are intentionally skipped when no usable stop timestamp exists +6. `region_unparseable` when region derivation from zone is not parseable + +### 10.3 Required details + +Details should include at least: + +- `instance_name` +- `machine_type` +- `zone` +- `raw_status` +- `stop_age_days` +- `max_age_days_threshold` +- `last_stop_timestamp` +- `mig_membership` +- `persistent_disk_count` +- `persistent_disk_total_gb` +- `disk_kinds_present` +- `boot_disk_count` +- `external_nat_ip_present` +- `gpu_attached` +- `labels` + +When present, details should also include: + +- `last_start_timestamp` +- `automatic_restart` + +--- + +## 11. Failure Behavior + +- Instance list permission denied -> raise permission error +- Compute Engine API disabled / not found -> return no findings +- Partial aggregated coverage -> surface warning / incomplete coverage signal +- Malformed scope key or instance record -> skip that item diff --git a/pyproject.toml b/pyproject.toml index 0b9d670..6d85d6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "cleancloud" -version = "1.27.0" +version = "1.28.0" description = "Read-only cloud hygiene for AWS, Azure, and GCP. Multi-account org scanning, CI/CD enforcement, and deterministic cost modeling. No agents, no telemetry." readme = "README.md" requires-python = ">=3.10" diff --git a/tests/cleancloud/providers/azure/test_azure_ai_search_idle.py b/tests/cleancloud/providers/azure/ai/test_azure_ai_search_idle.py similarity index 100% rename from tests/cleancloud/providers/azure/test_azure_ai_search_idle.py rename to tests/cleancloud/providers/azure/ai/test_azure_ai_search_idle.py diff --git a/tests/cleancloud/providers/azure/test_azure_aml_compute_idle.py b/tests/cleancloud/providers/azure/ai/test_azure_aml_compute_idle.py similarity index 100% rename from tests/cleancloud/providers/azure/test_azure_aml_compute_idle.py rename to tests/cleancloud/providers/azure/ai/test_azure_aml_compute_idle.py diff --git a/tests/cleancloud/providers/azure/test_azure_aml_compute_instance_idle.py b/tests/cleancloud/providers/azure/ai/test_azure_aml_compute_instance_idle.py similarity index 100% rename from tests/cleancloud/providers/azure/test_azure_aml_compute_instance_idle.py rename to tests/cleancloud/providers/azure/ai/test_azure_aml_compute_instance_idle.py diff --git a/tests/cleancloud/providers/azure/test_azure_ml_online_endpoint_idle.py b/tests/cleancloud/providers/azure/ai/test_azure_ml_online_endpoint_idle.py similarity index 100% rename from tests/cleancloud/providers/azure/test_azure_ml_online_endpoint_idle.py rename to tests/cleancloud/providers/azure/ai/test_azure_ml_online_endpoint_idle.py diff --git a/tests/cleancloud/providers/azure/test_azure_openai_provisioned_idle.py b/tests/cleancloud/providers/azure/ai/test_azure_openai_provisioned_idle.py similarity index 100% rename from tests/cleancloud/providers/azure/test_azure_openai_provisioned_idle.py rename to tests/cleancloud/providers/azure/ai/test_azure_openai_provisioned_idle.py diff --git a/tests/cleancloud/providers/gcp/test_gcp_featurestore_idle.py b/tests/cleancloud/providers/gcp/ai/test_gcp_featurestore_idle.py similarity index 98% rename from tests/cleancloud/providers/gcp/test_gcp_featurestore_idle.py rename to tests/cleancloud/providers/gcp/ai/test_gcp_featurestore_idle.py index 340ce66..004603b 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_featurestore_idle.py +++ b/tests/cleancloud/providers/gcp/ai/test_gcp_featurestore_idle.py @@ -26,7 +26,7 @@ from cleancloud.core.confidence import ConfidenceLevel from cleancloud.core.risk import RiskLevel -from cleancloud.providers.gcp.rules.featurestore_idle import ( +from cleancloud.providers.gcp.rules.ai.featurestore_idle import ( _BIGTABLE_NODE_HOURLY_COST, _DEFAULT_IDLE_DAYS, _HOURS_PER_MONTH, @@ -161,14 +161,14 @@ def _monitoring_side_effect(request=None, **kwargs): with ( patch( - "cleancloud.providers.gcp.rules.featurestore_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.featurestore_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.featurestore_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.featurestore_idle.monitoring_v3.MetricServiceClient", return_value=mock_monitoring, ), - patch("cleancloud.providers.gcp.rules.featurestore_idle.datetime") as mock_dt, + patch("cleancloud.providers.gcp.rules.ai.featurestore_idle.datetime") as mock_dt, ): mock_dt.now.return_value = NOW mock_dt.fromisoformat = datetime.fromisoformat diff --git a/tests/cleancloud/providers/gcp/test_gcp_tpu_idle.py b/tests/cleancloud/providers/gcp/ai/test_gcp_tpu_idle.py similarity index 96% rename from tests/cleancloud/providers/gcp/test_gcp_tpu_idle.py rename to tests/cleancloud/providers/gcp/ai/test_gcp_tpu_idle.py index 876526b..10f7aba 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_tpu_idle.py +++ b/tests/cleancloud/providers/gcp/ai/test_gcp_tpu_idle.py @@ -26,7 +26,7 @@ from cleancloud.core.confidence import ConfidenceLevel from cleancloud.core.risk import RiskLevel -from cleancloud.providers.gcp.rules.tpu_idle import ( +from cleancloud.providers.gcp.rules.ai.tpu_idle import ( _CHIP_HOURLY_COST, _DEFAULT_IDLE_DAYS, _DUTY_CYCLE_IDLE_THRESHOLD, @@ -129,14 +129,14 @@ def _run( with ( patch( - "cleancloud.providers.gcp.rules.tpu_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.tpu_idle.AuthorizedSession", return_value=mock_session_inst, ), patch( - "cleancloud.providers.gcp.rules.tpu_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.tpu_idle.monitoring_v3.MetricServiceClient", return_value=mock_monitoring_inst, ), - patch("cleancloud.providers.gcp.rules.tpu_idle.datetime") as mock_dt, + patch("cleancloud.providers.gcp.rules.ai.tpu_idle.datetime") as mock_dt, ): mock_dt.now.return_value = NOW mock_dt.fromisoformat = datetime.fromisoformat @@ -286,10 +286,10 @@ def test_permission_error_on_403(self): with ( patch( - "cleancloud.providers.gcp.rules.tpu_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.tpu_idle.AuthorizedSession", return_value=mock_session, ), - patch("cleancloud.providers.gcp.rules.tpu_idle.monitoring_v3.MetricServiceClient"), + patch("cleancloud.providers.gcp.rules.ai.tpu_idle.monitoring_v3.MetricServiceClient"), ): with pytest.raises(PermissionError, match="tpu.nodes.list"): find_idle_tpu_nodes(project_id=_PROJECT, credentials=MagicMock()) @@ -303,10 +303,10 @@ def test_tpu_api_not_enabled_returns_empty(self): with ( patch( - "cleancloud.providers.gcp.rules.tpu_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.tpu_idle.AuthorizedSession", return_value=mock_session, ), - patch("cleancloud.providers.gcp.rules.tpu_idle.monitoring_v3.MetricServiceClient"), + patch("cleancloud.providers.gcp.rules.ai.tpu_idle.monitoring_v3.MetricServiceClient"), ): findings = find_idle_tpu_nodes(project_id=_PROJECT, credentials=MagicMock()) assert findings == [] diff --git a/tests/cleancloud/providers/gcp/test_gcp_vertex_endpoint_idle.py b/tests/cleancloud/providers/gcp/ai/test_gcp_vertex_endpoint_idle.py similarity index 94% rename from tests/cleancloud/providers/gcp/test_gcp_vertex_endpoint_idle.py rename to tests/cleancloud/providers/gcp/ai/test_gcp_vertex_endpoint_idle.py index d81e601..23f74d6 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_vertex_endpoint_idle.py +++ b/tests/cleancloud/providers/gcp/ai/test_gcp_vertex_endpoint_idle.py @@ -22,7 +22,7 @@ import pytest -from cleancloud.providers.gcp.rules.vertex_endpoint_idle import ( +from cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle import ( _DAYS_IDLE, _DEFAULT_MACHINE_MONTHLY_COST, _GPU_MONTHLY_COST_EACH, @@ -152,19 +152,19 @@ def _run( with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle._list_endpoints", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle._list_endpoints", return_value=endpoints, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", return_value=monitoring_client, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.datetime", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.datetime", ) as mock_dt, ): mock_dt.now.return_value = NOW @@ -726,19 +726,19 @@ def test_batch_monitoring_single_call_per_location(): with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle._list_endpoints", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle._list_endpoints", return_value=[ep1, ep2], ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", return_value=monitoring_client, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.datetime", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.datetime", ) as mock_dt, ): mock_dt.now.return_value = NOW @@ -764,19 +764,19 @@ def test_batch_monitoring_separate_call_per_location(): with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle._list_endpoints", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle._list_endpoints", return_value=[ep1, ep2], ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", return_value=monitoring_client, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.datetime", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.datetime", ) as mock_dt, ): mock_dt.now.return_value = NOW @@ -842,15 +842,15 @@ def test_pagination_fetches_all_endpoints(): with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", return_value=monitoring_client, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.datetime", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.datetime", ) as mock_dt, ): mock_dt.now.return_value = NOW @@ -880,10 +880,10 @@ def test_403_raises_permission_error(): with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), ): @@ -904,10 +904,10 @@ def test_404_returns_empty(): with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), ): @@ -987,19 +987,19 @@ def test_eligible_endpoint_ids_guard_filters_stale_series(): with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle._list_endpoints", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle._list_endpoints", return_value=[ep], ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", return_value=monitoring_client, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.datetime", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.datetime", ) as mock_dt, ): mock_dt.now.return_value = NOW @@ -1135,7 +1135,7 @@ def test_min_cost_constant_is_reasonable(): """_MIN_MONTHLY_COST_USD is set and below the cheapest known machine type.""" assert _MIN_MONTHLY_COST_USD > 0 # All known machine types cost more than the filter threshold - from cleancloud.providers.gcp.rules.vertex_endpoint_idle import ( + from cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle import ( _MACHINE_MONTHLY_COST, ) @@ -1257,19 +1257,19 @@ def _run_with_monitoring_client(endpoints, monitoring_client, region_filter=None mock_credentials = MagicMock() with ( patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle._list_endpoints", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle._list_endpoints", return_value=endpoints, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.monitoring_v3.MetricServiceClient", return_value=monitoring_client, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.AuthorizedSession", return_value=mock_session, ), patch( - "cleancloud.providers.gcp.rules.vertex_endpoint_idle.datetime", + "cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle.datetime", **{"now.return_value": NOW, "fromisoformat": datetime.fromisoformat}, ), ): diff --git a/tests/cleancloud/providers/gcp/test_gcp_vertex_training_job_long_running.py b/tests/cleancloud/providers/gcp/ai/test_gcp_vertex_training_job_long_running.py similarity index 98% rename from tests/cleancloud/providers/gcp/test_gcp_vertex_training_job_long_running.py rename to tests/cleancloud/providers/gcp/ai/test_gcp_vertex_training_job_long_running.py index 2664645..0935d11 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_vertex_training_job_long_running.py +++ b/tests/cleancloud/providers/gcp/ai/test_gcp_vertex_training_job_long_running.py @@ -31,7 +31,7 @@ from cleancloud.core.confidence import ConfidenceLevel from cleancloud.core.risk import RiskLevel -from cleancloud.providers.gcp.rules.vertex_training_job_long_running import ( +from cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running import ( _BUNDLED_ACCELERATOR_COUNT, _DEFAULT_LONG_RUNNING_HOURS, _DEFAULT_MACHINE_MONTHLY_COST, @@ -142,11 +142,11 @@ def _run( creds = MagicMock() session = _make_session(custom_jobs=custom_jobs, training_pipelines=training_pipelines) with patch( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running.AuthorizedSession", return_value=session, ): with patch( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running.datetime" + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running.datetime" ) as mock_dt: mock_dt.now.return_value = NOW mock_dt.fromisoformat.side_effect = datetime.fromisoformat @@ -573,7 +573,7 @@ def test_permission_error_raises(): creds = MagicMock() session = _make_session(status=403) with patch( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running.AuthorizedSession", return_value=session, ): with pytest.raises(PermissionError): @@ -587,7 +587,7 @@ def test_partial_failure_warns_and_returns_partial_findings(): # Patch _list_jobs so customJobs succeeds but trainingPipelines raises original_list_jobs = __import__( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running", + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running", fromlist=["_list_jobs"], )._list_jobs @@ -601,11 +601,11 @@ def _patched_list_jobs(session, project_id, resource, state_filter): creds = MagicMock() with patch( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running.AuthorizedSession", return_value=good_session, ): with patch( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running._list_jobs", + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running._list_jobs", side_effect=_patched_list_jobs, ): import warnings as _warnings @@ -1087,7 +1087,7 @@ def test_skipped_jobs_warning_on_missing_timestamp(): mock_resp.json.return_value = {"customJobs": [job], "trainingPipelines": []} with patch( - "cleancloud.providers.gcp.rules.vertex_training_job_long_running.AuthorizedSession" + "cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running.AuthorizedSession" ) as mock_session_cls: mock_session = MagicMock() mock_session.get.return_value = mock_resp diff --git a/tests/cleancloud/providers/gcp/test_gcp_workbench_idle.py b/tests/cleancloud/providers/gcp/ai/test_gcp_workbench_idle.py similarity index 96% rename from tests/cleancloud/providers/gcp/test_gcp_workbench_idle.py rename to tests/cleancloud/providers/gcp/ai/test_gcp_workbench_idle.py index 1d96d9f..3bf24ba 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_workbench_idle.py +++ b/tests/cleancloud/providers/gcp/ai/test_gcp_workbench_idle.py @@ -22,7 +22,7 @@ from cleancloud.core.confidence import ConfidenceLevel from cleancloud.core.risk import RiskLevel -from cleancloud.providers.gcp.rules.workbench_idle import ( +from cleancloud.providers.gcp.rules.ai.workbench_idle import ( _DEFAULT_MACHINE_MONTHLY_COST, _GPU_MONTHLY_COST_EACH, _MACHINE_MONTHLY_COST, @@ -195,10 +195,10 @@ def test_none_machine_type_uses_default(self): class TestFindIdleWorkbenchInstances: def _run(self, instances: list, **kwargs): with patch( - "cleancloud.providers.gcp.rules.workbench_idle._list_instances", + "cleancloud.providers.gcp.rules.ai.workbench_idle._list_instances", return_value=instances, ): - with patch("cleancloud.providers.gcp.rules.workbench_idle.datetime") as mock_dt: + with patch("cleancloud.providers.gcp.rules.ai.workbench_idle.datetime") as mock_dt: mock_dt.now.return_value = NOW mock_dt.fromisoformat = datetime.fromisoformat return find_idle_workbench_instances( @@ -364,7 +364,7 @@ def test_500_from_v2_does_not_abort_scan(self): mock_session.get.return_value = resp_500 with patch( - "cleancloud.providers.gcp.rules.workbench_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.workbench_idle.AuthorizedSession", return_value=mock_session, ): findings = find_idle_workbench_instances(project_id=_PROJECT, credentials=MagicMock()) @@ -384,7 +384,7 @@ def test_403_raises_permission_error(self): mock_session.get.return_value = response with patch( - "cleancloud.providers.gcp.rules.workbench_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.workbench_idle.AuthorizedSession", return_value=mock_session, ): with pytest.raises(PermissionError, match="notebooks.instances.list"): @@ -397,7 +397,7 @@ def test_404_returns_empty(self): mock_session.get.return_value = response with patch( - "cleancloud.providers.gcp.rules.workbench_idle.AuthorizedSession", + "cleancloud.providers.gcp.rules.ai.workbench_idle.AuthorizedSession", return_value=mock_session, ): findings = find_idle_workbench_instances(project_id=_PROJECT, credentials=MagicMock()) diff --git a/tests/cleancloud/providers/gcp/test_gcp_disk_unattached.py b/tests/cleancloud/providers/gcp/test_gcp_disk_unattached.py index daf268a..da24178 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_disk_unattached.py +++ b/tests/cleancloud/providers/gcp/test_gcp_disk_unattached.py @@ -93,28 +93,16 @@ def test_non_ready_disk_not_flagged(monkeypatch): assert findings == [] -def test_cost_calculation_pd_ssd(monkeypatch): - """Cost should use the per-type rate: pd-ssd @ $0.17/GB/month.""" +def test_estimated_monthly_cost_is_none(monkeypatch): + """spec 9.5.1: estimated_monthly_cost_usd must always be None (pricing varies by region/currency).""" _mock_client( - {"zones/us-central1-a": [_make_disk("ssd-disk", disk_type="pd-ssd", size_gb=200)]}, + {"zones/us-central1-a": [_make_disk("std-disk", disk_type="pd-ssd", size_gb=500)]}, monkeypatch, ) findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 - assert findings[0].estimated_monthly_cost_usd == round(200 * 0.17, 2) - - -def test_cost_calculation_pd_standard(monkeypatch): - """pd-standard disks should use $0.04/GB/month.""" - _mock_client( - {"zones/us-central1-a": [_make_disk("std-disk", disk_type="pd-standard", size_gb=500)]}, - monkeypatch, - ) - findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) - - assert len(findings) == 1 - assert findings[0].estimated_monthly_cost_usd == round(500 * 0.04, 2) + assert findings[0].estimated_monthly_cost_usd is None def test_region_filter_excludes_other_zones(monkeypatch): @@ -345,8 +333,8 @@ def test_no_last_detach_timestamp_not_in_details(monkeypatch): # --------------------------------------------------------------------------- -def test_hyperdisk_cost_note_in_signals_not_checked(monkeypatch): - """Hyperdisk findings should warn that IOPS/throughput charges are not included.""" +def test_hyperdisk_iops_note_in_signals_not_checked(monkeypatch): + """Hyperdisk findings should note that IOPS/throughput charges are billed separately.""" _mock_client( { "zones/us-central1-a": [ @@ -362,19 +350,259 @@ def test_hyperdisk_cost_note_in_signals_not_checked(monkeypatch): assert any("IOPS" in s for s in not_checked) -def test_hyperdisk_uses_conservative_cost_rate(monkeypatch): - """Hyperdisk cost estimate should use the pd-standard floor rate ($0.04/GB).""" +# --------------------------------------------------------------------------- +# Spec 7: disk_type normalization +# --------------------------------------------------------------------------- + + +def test_disk_type_fallback_is_unknown(monkeypatch): + """spec 7: when disk type URL is absent, disk_type should be 'unknown', not a guessed default.""" + disk = _make_disk("no-type-disk") + disk.type_ = "" # empty URL + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", SimpleNamespace(disks=[disk]))] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["disk_type"] == "unknown" + + +# --------------------------------------------------------------------------- +# Spec 8.1: malformed disk name +# --------------------------------------------------------------------------- + + +def test_absent_disk_name_is_skipped(monkeypatch): + """spec 8.1: disk records with absent/empty name must be skipped.""" + disk = _make_disk("placeholder") + disk.name = "" # absent name + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", SimpleNamespace(disks=[disk]))] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Spec 8.2: malformed scope key +# --------------------------------------------------------------------------- + + +def test_malformed_scope_key_is_skipped(monkeypatch): + """spec 8.2: scope keys without a '/' (e.g. 'global') must be skipped, not crash.""" + disk = _make_disk("disk-global") + mock = MagicMock() + mock.aggregated_list.return_value = [ + ("global", SimpleNamespace(disks=[disk])) # no slash — len == 1, != 2 + ] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_extra_segment_scope_key_is_skipped(monkeypatch): + """spec 8.2: only exactly 'zones/ZONE' or 'regions/REGION' are supported; + a key like 'zones/us-central1-a/extra' has 3 segments and must be skipped.""" + disk = _make_disk("disk-extra") + mock = MagicMock() + mock.aggregated_list.return_value = [ + ("zones/us-central1-a/extra", SimpleNamespace(disks=[disk])) # len == 3, != 2 + ] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Spec 8.5: unresolvable users field +# --------------------------------------------------------------------------- + + +def test_none_users_is_skipped(monkeypatch): + """spec 8.5: disk.users=None is not equivalent to an empty list — must skip.""" + disk = _make_disk("null-users-disk") + disk.users = None # bypass the _make_disk default + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", SimpleNamespace(disks=[disk]))] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Spec 9.5 / 10.2: cost blind-spot disclosure +# --------------------------------------------------------------------------- + + +def test_exact_pricing_blind_spot_in_signals_not_checked(monkeypatch): + """spec 9.5/10.2: signals_not_checked must disclose that exact pricing is unavailable.""" _mock_client( - { - "zones/us-central1-a": [ - _make_disk("hd-disk", disk_type="hyperdisk-extreme", size_gb=100) - ] - }, + {"zones/us-central1-a": [_make_disk("billing-disk", disk_type="pd-ssd", size_gb=100)]}, monkeypatch, ) findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) - assert findings[0].estimated_monthly_cost_usd == round(100 * 0.04, 2) + assert len(findings) == 1 + not_checked = findings[0].evidence.signals_not_checked + assert any( + "cost" in s.lower() or "pricing" in s.lower() or "billing" in s.lower() for s in not_checked + ) + + +# --------------------------------------------------------------------------- +# Issue 1: Partial aggregated coverage (spec 9.1.8-9 / 9.6) +# --------------------------------------------------------------------------- + + +def test_partial_coverage_warning_is_emitted(monkeypatch): + """spec 9.1.8-9: a scope with a warning code must emit a UserWarning, not silently pass.""" + disk = _make_disk("disk-partial") + scope_with_warning = SimpleNamespace( + disks=[disk], + warning=SimpleNamespace(code="NO_RESULTS_ON_PAGE", message="partial"), + ) + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", scope_with_warning)] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + with pytest.warns(UserWarning, match="partial coverage"): + find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + + +def test_scope_without_warning_attr_does_not_warn(monkeypatch): + """No warning attribute on scope_disks must not raise and must not warn.""" + # _make_scoped_disk_list returns a SimpleNamespace with no 'warning' attribute + _mock_client({"zones/us-central1-a": [_make_disk("quiet-disk")]}, monkeypatch) + import warnings as _w + + with _w.catch_warnings(): + _w.simplefilter("error") # any warning → error + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +# --------------------------------------------------------------------------- +# Issue 2: users[] strict list check (spec 8.5) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "bad_users", + [ + {}, # dict — falsy but not a list + (), # tuple — falsy but not a list + "", # string — falsy but not a list + 0, # int — falsy but not a list + ], + ids=["dict", "tuple", "str", "int"], +) +def test_non_list_users_is_skipped(monkeypatch, bad_users): + """spec 8.5: only an explicit empty list means unattached; other falsy types must skip.""" + disk = _make_disk("bad-users-disk") + disk.users = bad_users + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", SimpleNamespace(disks=[disk]))] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Issue 3: zone-to-region parsing strictness (spec 8.2 / 7) +# --------------------------------------------------------------------------- + + +def test_zone_scope_without_zone_letter_is_skipped(monkeypatch): + """spec 8.2/7: a zonal scope like zones/us-central1 (no single-letter suffix) must skip. + + rsplit alone would silently derive 'us' as the region, which is a silent wrong guess. + """ + disk = _make_disk("region-only-zone") + mock = MagicMock() + mock.aggregated_list.return_value = [ + ("zones/us-central1", SimpleNamespace(disks=[disk])) # missing zone letter + ] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_zone_scope_with_multi_char_suffix_is_skipped(monkeypatch): + """spec 8.2/7: a zone suffix longer than one character must skip (not a valid GCP zone).""" + disk = _make_disk("bad-suffix-disk") + mock = MagicMock() + mock.aggregated_list.return_value = [ + ("zones/us-central1-ab", SimpleNamespace(disks=[disk])) # two-char suffix + ] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Issue 4: malformed record hardening (spec 9.6) +# --------------------------------------------------------------------------- + + +def test_malformed_size_gb_uses_zero(monkeypatch): + """spec 9.6: non-numeric size_gb must not crash; the disk should still be emitted with size 0.""" + disk = _make_disk("bad-size-disk") + disk.size_gb = "not-a-number" + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", SimpleNamespace(disks=[disk]))] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["size_gb"] == 0 + + +def test_non_string_last_detach_timestamp_keeps_baseline_confidence(monkeypatch): + """spec 9.6 / 7: non-string last_detach_timestamp must not crash; baseline confidence kept.""" + disk = _make_disk("ts-type-disk") + disk.last_detach_timestamp = 12345 # integer, not a string + mock = MagicMock() + mock.aggregated_list.return_value = [("zones/us-central1-a", SimpleNamespace(disks=[disk]))] + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.disk_unattached.compute_v1.DisksClient", + lambda credentials: mock, + ) + findings = find_unattached_disks(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + # Non-string timestamp treated as absent → zonal baseline confidence = HIGH + assert findings[0].confidence == ConfidenceLevel.HIGH + assert "last_detach_timestamp" not in findings[0].details # --------------------------------------------------------------------------- diff --git a/tests/cleancloud/providers/gcp/test_gcp_ip_unused.py b/tests/cleancloud/providers/gcp/test_gcp_ip_unused.py index 6f90071..8a5b060 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_ip_unused.py +++ b/tests/cleancloud/providers/gcp/test_gcp_ip_unused.py @@ -1,5 +1,6 @@ """Unit tests for gcp.compute.ip.unused rule.""" +import warnings from types import SimpleNamespace from unittest.mock import MagicMock @@ -9,40 +10,66 @@ from cleancloud.core.confidence import ConfidenceLevel from cleancloud.providers.gcp.rules.ip_unused import find_unused_static_ips +_EXPECTED_COST = 7.30 # spec 9.7: $0.01/hr × 730h + def _make_address( name, status="RESERVED", address="34.1.2.3", address_type="EXTERNAL", + ip_version="IPV4", labels=None, network_tier="PREMIUM", purpose="", creation_timestamp="2024-01-01T00:00:00+00:00", + users=None, ): return SimpleNamespace( name=name, status=status, address=address, address_type=address_type, + ip_version=ip_version, labels=labels or {}, network_tier=network_tier, purpose=purpose, creation_timestamp=creation_timestamp, + users=users or [], ) -def _make_scoped_address_list(addresses): - return SimpleNamespace(addresses=addresses) +def _make_scoped_address_list(addresses, warning=None): + return SimpleNamespace(addresses=addresses, warning=warning) + + +def _make_page(scope_to_addrs, page_warning=None, unreachables=None): + """Build one page of aggregated_list results. + + scope_to_addrs: {scope_key: list_of_addresses} or {scope_key: ScopedList}. + page_warning: optional page-level warning SimpleNamespace. + unreachables: optional list of unreachable scope strings. + """ + items = {} + for scope, val in scope_to_addrs.items(): + if isinstance(val, list): + items[scope] = _make_scoped_address_list(val) + else: + items[scope] = val # pre-built ScopedList (carries its own warning) + return SimpleNamespace(items=items, warning=page_warning, unreachables=unreachables or []) + + +def _make_pager(pages): + """Wrap a list of pages in a mock pager with a .pages attribute.""" + pager = MagicMock() + pager.pages = pages + return pager def _mock_clients(region_address_map, monkeypatch, global_addresses=None): - """Patch both AddressesClient (regional) and GlobalAddressesClient.""" + """Patch AddressesClient (regional pager) and GlobalAddressesClient.""" regional_mock = MagicMock() - regional_mock.aggregated_list.return_value = [ - (scope, _make_scoped_address_list(addresses)) - for scope, addresses in region_address_map.items() - ] + regional_mock.aggregated_list.return_value = _make_pager([_make_page(region_address_map)]) monkeypatch.setattr( "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", lambda credentials: regional_mock, @@ -57,8 +84,13 @@ def _mock_clients(region_address_map, monkeypatch, global_addresses=None): return regional_mock, global_mock +# --------------------------------------------------------------------------- +# Basic detection +# --------------------------------------------------------------------------- + + def test_reserved_regional_ip_flagged(monkeypatch): - """A RESERVED regional IP produces a finding.""" + """A RESERVED regional IPv4 EXTERNAL IP produces a finding.""" _mock_clients( {"regions/us-central1": [_make_address("unused-ip", status="RESERVED")]}, monkeypatch, @@ -72,10 +104,22 @@ def test_reserved_regional_ip_flagged(monkeypatch): assert "unused-ip" in f.resource_id assert f.region == "us-central1" assert f.confidence == ConfidenceLevel.HIGH - assert f.estimated_monthly_cost_usd == 7.20 + assert f.estimated_monthly_cost_usd == _EXPECTED_COST assert f.details["scope"] == "regional" +def test_estimated_cost_is_7_30(monkeypatch): + """Estimated monthly cost must be exactly $7.30 (spec 9.7: $0.01/hr × 730h).""" + _mock_clients( + {"regions/us-central1": [_make_address("ip")]}, + monkeypatch, + global_addresses=[_make_address("g-ip")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + assert all(f.estimated_monthly_cost_usd == 7.30 for f in findings) + + def test_in_use_regional_ip_not_flagged(monkeypatch): """An IN_USE regional IP should not be flagged.""" _mock_clients( @@ -86,8 +130,18 @@ def test_in_use_regional_ip_not_flagged(monkeypatch): assert findings == [] +def test_reserving_status_not_flagged(monkeypatch): + """RESERVING status must not produce a finding (spec 9.2.3).""" + _mock_clients( + {"regions/us-central1": [_make_address("ip", status="RESERVING")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + def test_reserved_global_ip_flagged(monkeypatch): - """A RESERVED global IP produces a finding with region='global'.""" + """A RESERVED global IPv4 EXTERNAL IP produces a finding with region='global'.""" _mock_clients( {}, monkeypatch, @@ -100,7 +154,7 @@ def test_reserved_global_ip_flagged(monkeypatch): assert f.region == "global" assert f.details["scope"] == "global" assert "global-ip" in f.resource_id - assert f.estimated_monthly_cost_usd == 7.20 + assert f.estimated_monthly_cost_usd == _EXPECTED_COST def test_in_use_global_ip_not_flagged(monkeypatch): @@ -128,6 +182,347 @@ def test_regional_and_global_both_returned(monkeypatch): assert scopes == {"regional", "global"} +# --------------------------------------------------------------------------- +# Spec 8.6 / 9.3: addressType must be exactly "EXTERNAL" +# --------------------------------------------------------------------------- + + +def test_internal_ip_not_flagged(monkeypatch): + """INTERNAL addresses are out of scope (spec 8.6, 9.3).""" + _mock_clients( + { + "regions/us-central1": [ + _make_address("internal-ip", status="RESERVED", address_type="INTERNAL"), + _make_address("external-ip", status="RESERVED", address_type="EXTERNAL"), + ] + }, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["address_name"] == "external-ip" + + +def test_absent_address_type_skipped(monkeypatch): + """Absent / empty addressType must skip — unknown is not 'EXTERNAL' (spec 8.6).""" + _mock_clients( + {"regions/us-central1": [_make_address("ip", address_type="")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_global_absent_address_type_skipped(monkeypatch): + """Global address with absent addressType must skip (spec 8.6).""" + _mock_clients( + {}, + monkeypatch, + global_addresses=[_make_address("g-ip", address_type="")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Spec 8.7 / 9.3: ipVersion must be exactly "IPV4" +# --------------------------------------------------------------------------- + + +def test_ipv6_regional_ip_not_flagged(monkeypatch): + """IPv6 addresses are out of scope (spec 8.7, 9.3).""" + _mock_clients( + {"regions/us-central1": [_make_address("ipv6-ip", ip_version="IPV6")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_absent_ip_version_skipped(monkeypatch): + """Absent / empty ipVersion must skip — unknown is not 'IPV4' (spec 8.7).""" + _mock_clients( + {"regions/us-central1": [_make_address("ip", ip_version="")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_ipv6_global_ip_not_flagged(monkeypatch): + """Global IPv6 addresses are out of scope (spec 8.7).""" + _mock_clients( + {}, + monkeypatch, + global_addresses=[_make_address("g-ipv6", ip_version="IPV6")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_ip_version_in_details(monkeypatch): + """ip_version must appear in finding details (spec 11.3).""" + _mock_clients( + {"regions/us-central1": [_make_address("ip")]}, + monkeypatch, + global_addresses=[_make_address("g-ip")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + assert all(f.details["ip_version"] == "IPV4" for f in findings) + + +# --------------------------------------------------------------------------- +# Spec 8.8 / 9.4: NAT_AUTO exclusion +# --------------------------------------------------------------------------- + + +def test_nat_auto_regional_ip_not_flagged(monkeypatch): + """purpose == NAT_AUTO must be excluded (spec 8.8, 9.4).""" + _mock_clients( + {"regions/us-central1": [_make_address("nat-ip", purpose="NAT_AUTO")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_nat_auto_global_ip_not_flagged(monkeypatch): + """Global purpose == NAT_AUTO must be excluded (spec 8.8).""" + _mock_clients( + {}, + monkeypatch, + global_addresses=[_make_address("nat-g-ip", purpose="NAT_AUTO")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_other_purpose_regional_ip_flagged(monkeypatch): + """Non-NAT_AUTO purpose should not block detection.""" + _mock_clients( + {"regions/us-central1": [_make_address("lb-ip", purpose="SHARED_LOADBALANCER_VIP")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +# --------------------------------------------------------------------------- +# Spec 8.9 / 9.5: users[] contradictory current-use evidence +# --------------------------------------------------------------------------- + + +def test_non_empty_users_regional_skipped(monkeypatch): + """Non-empty users[] is contradictory evidence — must skip (spec 8.9).""" + _mock_clients( + { + "regions/us-central1": [ + _make_address("in-use-ip", users=["projects/p/zones/z/instances/vm1"]), + ] + }, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_non_empty_users_global_skipped(monkeypatch): + """Global address with non-empty users[] must skip (spec 8.9).""" + _mock_clients( + {}, + monkeypatch, + global_addresses=[ + _make_address("g-in-use", users=["projects/p/global/forwardingRules/fr1"]) + ], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_empty_users_regional_flagged(monkeypatch): + """Empty users[] with RESERVED status should still emit a finding.""" + _mock_clients( + {"regions/us-central1": [_make_address("ip", users=[])]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +# --------------------------------------------------------------------------- +# Spec 8.1: malformed records +# --------------------------------------------------------------------------- + + +def test_absent_name_regional_skipped(monkeypatch): + """Address record with absent / empty name must skip (spec 8.1).""" + _mock_clients( + {"regions/us-central1": [_make_address("")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_absent_name_global_skipped(monkeypatch): + """Global address record with absent / empty name must skip (spec 8.1).""" + _mock_clients( + {}, + monkeypatch, + global_addresses=[_make_address("")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Spec 8.2: scope key validation +# --------------------------------------------------------------------------- + + +def test_malformed_scope_key_skipped(monkeypatch): + """Scope key that is not exactly 'regions/REGION' must be skipped (spec 8.2).""" + _mock_clients( + {"global": [_make_address("bad-ip")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_extra_segment_scope_key_skipped(monkeypatch): + """Scope key with >2 segments (e.g. 'regions/us-central1/extra') must be skipped (spec 8.2).""" + _mock_clients( + {"regions/us-central1/extra": [_make_address("ip")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_zones_scope_key_skipped(monkeypatch): + """Scope key of form 'zones/...' must be skipped — not a regional address scope.""" + _mock_clients( + {"zones/us-central1-a": [_make_address("ip")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Spec 9.1.2: returnPartialSuccess=True +# --------------------------------------------------------------------------- + + +def test_aggregated_list_called_with_return_partial_success(monkeypatch): + """aggregated_list must be called with return_partial_success=True (spec 9.1.2).""" + regional_mock, _ = _mock_clients({}, monkeypatch) + find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + call_kwargs = regional_mock.aggregated_list.call_args.kwargs + # return_partial_success is passed inside the request dict + request = call_kwargs.get("request") or {} + assert request.get("return_partial_success") is True + + +# --------------------------------------------------------------------------- +# Spec 9.1.6-7: partial coverage warnings (scope, page-level, unreachables) +# --------------------------------------------------------------------------- + + +def test_scope_level_warning_is_emitted(monkeypatch): + """Scope-level warning from aggregated_list must be surfaced as UserWarning (spec 9.1.6).""" + scope_warning = SimpleNamespace(code="NO_RESULTS_ON_PAGE", message="partial") + scoped_list = _make_scoped_address_list([], warning=scope_warning) + regional_mock = MagicMock() + regional_mock.aggregated_list.return_value = _make_pager( + [_make_page({"regions/us-central1": scoped_list})] + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", + lambda credentials: regional_mock, + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.ip_unused.compute_v1.GlobalAddressesClient", + lambda credentials: MagicMock(list=lambda project: []), + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + user_warnings = [w for w in caught if issubclass(w.category, UserWarning)] + assert len(user_warnings) == 1 + assert "regions/us-central1" in str(user_warnings[0].message) + assert "NO_RESULTS_ON_PAGE" in str(user_warnings[0].message) + + +def test_top_level_page_warning_is_emitted(monkeypatch): + """Page-level (top-level) warning must be surfaced as UserWarning (spec 9.1.6).""" + page_warning = SimpleNamespace(code="UNREACHABLE", message="some scopes unreachable") + regional_mock = MagicMock() + regional_mock.aggregated_list.return_value = _make_pager( + [_make_page({}, page_warning=page_warning)] + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", + lambda credentials: regional_mock, + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.ip_unused.compute_v1.GlobalAddressesClient", + lambda credentials: MagicMock(list=lambda project: []), + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + user_warnings = [w for w in caught if issubclass(w.category, UserWarning)] + assert len(user_warnings) == 1 + assert "top-level" in str(user_warnings[0].message) + assert "UNREACHABLE" in str(user_warnings[0].message) + + +def test_unreachable_scope_warning_is_emitted(monkeypatch): + """Each unreachable scope in the page must be surfaced as a UserWarning (spec 9.1.6-7).""" + regional_mock = MagicMock() + regional_mock.aggregated_list.return_value = _make_pager( + [_make_page({}, unreachables=["regions/us-east1", "regions/europe-west1"])] + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", + lambda credentials: regional_mock, + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.ip_unused.compute_v1.GlobalAddressesClient", + lambda credentials: MagicMock(list=lambda project: []), + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + user_warnings = [w for w in caught if issubclass(w.category, UserWarning)] + assert len(user_warnings) == 2 + warning_texts = [str(w.message) for w in user_warnings] + assert any("regions/us-east1" in t for t in warning_texts) + assert any("regions/europe-west1" in t for t in warning_texts) + + +def test_no_warning_emitted_on_clean_page(monkeypatch): + """A page with no warning or unreachables must not emit UserWarnings.""" + _mock_clients({"regions/us-central1": []}, monkeypatch) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + assert not any(issubclass(w.category, UserWarning) for w in caught) + + +# --------------------------------------------------------------------------- +# Region filter (spec 8.3, 8.4, 9.6) +# --------------------------------------------------------------------------- + + def test_region_filter_excludes_other_regions(monkeypatch): """region_filter restricts results to the matching region only.""" _mock_clients( @@ -146,7 +541,7 @@ def test_region_filter_excludes_other_regions(monkeypatch): def test_region_filter_skips_global_ips(monkeypatch): - """When region_filter is set, global IPs are not scanned.""" + """When region_filter is set, global IPs are not scanned (spec 8.4, 9.6).""" regional_mock, global_mock = _mock_clients( {"regions/us-central1": [_make_address("r-ip", status="RESERVED")]}, monkeypatch, @@ -165,12 +560,17 @@ def test_empty_region_skipped(monkeypatch): assert findings == [] +# --------------------------------------------------------------------------- +# Failure behavior (spec 9.8) +# --------------------------------------------------------------------------- + + def test_regional_permission_denied_raises(monkeypatch): - """PermissionDenied during regional iteration raises PermissionError.""" + """PermissionDenied during regional page iteration raises PermissionError (spec 9.8.1).""" regional_mock = MagicMock() - regional_mock.aggregated_list.return_value = iter( - _RaiseOnIter(PermissionDenied("compute.addresses.list denied")) - ) + pager = MagicMock() + pager.pages = _RaiseOnIter(PermissionDenied("compute.addresses.list denied")) + regional_mock.aggregated_list.return_value = pager monkeypatch.setattr( "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", lambda credentials: regional_mock, @@ -184,11 +584,11 @@ def test_regional_permission_denied_raises(monkeypatch): def test_regional_not_found_returns_empty(monkeypatch): - """NotFound (Compute API not enabled) during regional scan returns empty list.""" + """NotFound (Compute API not enabled) during regional scan returns empty list (spec 9.8.3).""" regional_mock = MagicMock() - regional_mock.aggregated_list.return_value = iter( - _RaiseOnIter(NotFound("Compute Engine API not enabled")) - ) + pager = MagicMock() + pager.pages = _RaiseOnIter(NotFound("Compute Engine API not enabled")) + regional_mock.aggregated_list.return_value = pager monkeypatch.setattr( "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", lambda credentials: regional_mock, @@ -201,17 +601,14 @@ def test_regional_not_found_returns_empty(monkeypatch): assert findings == [] -def test_global_permission_denied_returns_regional_only(monkeypatch): - """PermissionDenied on global IPs is silently swallowed; regional findings returned.""" +def test_global_permission_denied_raises(monkeypatch): + """PermissionDenied on global IPs must surface as PermissionError (spec 9.8.2).""" global_mock = MagicMock() global_mock.list.return_value = iter( _RaiseOnIter(PermissionDenied("compute.globalAddresses.list denied")) ) - regional_mock = MagicMock() - regional_mock.aggregated_list.return_value = [ - ("regions/us-central1", _make_scoped_address_list([_make_address("r-ip")])) - ] + regional_mock.aggregated_list.return_value = _make_pager([_make_page({})]) monkeypatch.setattr( "cleancloud.providers.gcp.rules.ip_unused.compute_v1.AddressesClient", lambda credentials: regional_mock, @@ -220,11 +617,13 @@ def test_global_permission_denied_returns_regional_only(monkeypatch): "cleancloud.providers.gcp.rules.ip_unused.compute_v1.GlobalAddressesClient", lambda credentials: global_mock, ) - findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + with pytest.raises(PermissionError, match="compute.globalAddresses.list"): + find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) - # Regional finding returned, global error silently swallowed - assert len(findings) == 1 - assert findings[0].details["scope"] == "regional" + +# --------------------------------------------------------------------------- +# Details, evidence, and signals +# --------------------------------------------------------------------------- def test_labels_in_details(monkeypatch): @@ -243,11 +642,6 @@ def test_labels_in_details(monkeypatch): assert findings[0].details["labels"] == {"team": "infra", "env": "prod"} -# --------------------------------------------------------------------------- -# is_regional, network_tier, and "estimated" wording -# --------------------------------------------------------------------------- - - def test_regional_ip_has_is_regional_true(monkeypatch): """Regional IP details should include is_regional=True.""" _mock_clients( @@ -306,6 +700,31 @@ def test_premium_tier_no_extra_cost_note(monkeypatch): assert not any("STANDARD tier IPs cost less" in s for s in not_checked) +def test_absent_regional_network_tier_stored_as_none(monkeypatch): + """Absent regional network_tier must not be guessed — stored as None (spec 7).""" + _mock_clients( + {"regions/us-central1": [_make_address("ip", network_tier="")]}, + monkeypatch, + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["network_tier"] is None + + +def test_absent_global_network_tier_stored_as_none(monkeypatch): + """Absent global network_tier must not be guessed — stored as None (spec 7).""" + _mock_clients( + {}, + monkeypatch, + global_addresses=[_make_address("g-ip", network_tier="")], + ) + findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["network_tier"] is None + + def test_purpose_in_details(monkeypatch): """purpose field should appear in details to aid triage.""" _mock_clients( @@ -341,33 +760,31 @@ def test_creation_timestamp_in_details(monkeypatch): assert all(f.details["creation_timestamp"] == ts for f in findings) -def test_internal_ip_not_flagged(monkeypatch): - """INTERNAL addresses are not subject to external IP reservation billing — skip them.""" +def test_summary_contains_estimated(monkeypatch): + """Summary should include 'estimated' for both regional and global IPs.""" _mock_clients( - { - "regions/us-central1": [ - _make_address("internal-ip", status="RESERVED", address_type="INTERNAL"), - _make_address("external-ip", status="RESERVED", address_type="EXTERNAL"), - ] - }, + {"regions/us-central1": [_make_address("r-ip")]}, monkeypatch, + global_addresses=[_make_address("g-ip")], ) findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) - assert len(findings) == 1 - assert findings[0].details["address_name"] == "external-ip" + assert all("estimated" in f.summary for f in findings) -def test_summary_contains_estimated(monkeypatch): - """Summary should include 'estimated' for both regional and global IPs.""" +def test_signals_used_include_ipv4_and_external(monkeypatch): + """signals_used must disclose address type and IP version (spec 11.2).""" _mock_clients( - {"regions/us-central1": [_make_address("r-ip")]}, + {"regions/us-central1": [_make_address("ip")]}, monkeypatch, global_addresses=[_make_address("g-ip")], ) findings = find_unused_static_ips(project_id="proj-1", credentials=MagicMock()) - assert all("estimated" in f.summary for f in findings) + for f in findings: + signals = " ".join(f.evidence.signals_used) + assert "EXTERNAL" in signals + assert "IPv4" in signals # --------------------------------------------------------------------------- diff --git a/tests/cleancloud/providers/gcp/test_gcp_snapshot_old.py b/tests/cleancloud/providers/gcp/test_gcp_snapshot_old.py index 6011629..cbcbc7d 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_snapshot_old.py +++ b/tests/cleancloud/providers/gcp/test_gcp_snapshot_old.py @@ -8,6 +8,7 @@ from google.api_core.exceptions import Forbidden, NotFound, PermissionDenied from cleancloud.core.confidence import ConfidenceLevel +from cleancloud.core.risk import RiskLevel from cleancloud.providers.gcp.rules.snapshot_old import find_old_snapshots @@ -23,23 +24,33 @@ def _make_snapshot( creation_timestamp=None, disk_size_gb=100, storage_bytes=0, + storage_bytes_status="", source_disk="zones/us-central1-a/disks/my-disk", + source_disk_id="", labels=None, storage_locations=None, - source_disk_id="", chain_name="", + snapshot_type="", + auto_created=False, + source_snapshot_schedule_policy="", + source_snapshot_schedule_policy_id="", ): return SimpleNamespace( name=name, status=status, - creation_timestamp=creation_timestamp or _ts(100), + creation_timestamp=_ts(100) if creation_timestamp is None else creation_timestamp, disk_size_gb=disk_size_gb, storage_bytes=storage_bytes, + storage_bytes_status=storage_bytes_status, source_disk=source_disk, + source_disk_id=source_disk_id, labels=labels or {}, storage_locations=storage_locations or [], - source_disk_id=source_disk_id, chain_name=chain_name, + snapshot_type=snapshot_type, + auto_created=auto_created, + source_snapshot_schedule_policy=source_snapshot_schedule_policy, + source_snapshot_schedule_policy_id=source_snapshot_schedule_policy_id, ) @@ -54,6 +65,11 @@ def _mock_client(snapshots, monkeypatch): return mock +# --------------------------------------------------------------------------- +# Basic detection +# --------------------------------------------------------------------------- + + def test_old_snapshot_is_flagged(monkeypatch): """A READY snapshot older than 90 days should produce a finding.""" _mock_client([_make_snapshot("old-snap", creation_timestamp=_ts(100))], monkeypatch) @@ -65,7 +81,7 @@ def test_old_snapshot_is_flagged(monkeypatch): assert f.provider == "gcp" assert "old-snap" in f.resource_id assert f.region == "global" - assert f.details["max_age_days"] >= 100 + assert f.details["age_days"] >= 100 def test_recent_snapshot_not_flagged(monkeypatch): @@ -76,7 +92,7 @@ def test_recent_snapshot_not_flagged(monkeypatch): def test_non_ready_snapshot_not_flagged(monkeypatch): - """Snapshots not in READY status are skipped.""" + """Snapshots not in READY status are skipped (spec 8.2).""" _mock_client( [_make_snapshot("creating-snap", status="CREATING", creation_timestamp=_ts(200))], monkeypatch, @@ -85,179 +101,348 @@ def test_non_ready_snapshot_not_flagged(monkeypatch): assert findings == [] -def test_source_disk_deleted_high_confidence(monkeypatch): - """When source_disk is empty, confidence should be HIGH (orphaned snapshot).""" +def test_failed_snapshot_not_flagged(monkeypatch): + """FAILED snapshots are skipped (spec 8.2).""" _mock_client( - [_make_snapshot("orphan-snap", source_disk="", creation_timestamp=_ts(100))], + [_make_snapshot("failed-snap", status="FAILED", creation_timestamp=_ts(200))], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] - assert len(findings) == 1 - assert findings[0].confidence == ConfidenceLevel.HIGH - assert findings[0].details["source_disk_deleted"] is True + +def test_absent_name_skipped(monkeypatch): + """Malformed snapshot with empty name is skipped (spec 8.1).""" + _mock_client([_make_snapshot("")], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] -def test_source_disk_present_medium_confidence(monkeypatch): - """When source disk still exists, confidence should be MEDIUM.""" +def test_unparsable_timestamp_skipped(monkeypatch): + """Snapshot with unparsable creation_timestamp is skipped (spec 8.3).""" _mock_client( - [ - _make_snapshot( - "active-source-snap", - source_disk="zones/us-central1-a/disks/live-disk", - creation_timestamp=_ts(100), - ) - ], + [_make_snapshot("bad-ts-snap", creation_timestamp="not-a-timestamp")], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] - assert len(findings) == 1 - assert findings[0].confidence == ConfidenceLevel.MEDIUM - assert findings[0].details["source_disk_deleted"] is False + +def test_missing_timestamp_skipped(monkeypatch): + """Snapshot with empty creation_timestamp is skipped (spec 8.3).""" + _mock_client( + [_make_snapshot("no-ts-snap", creation_timestamp="")], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] -def test_cost_from_storage_bytes(monkeypatch): - """When storage_bytes is available, it's used instead of disk_size_gb.""" - # 50 GB of actual storage bytes - storage_bytes = 50 * (1024**3) +# --------------------------------------------------------------------------- +# spec 8.5: archive snapshot exclusion +# --------------------------------------------------------------------------- + + +def test_archive_snapshot_skipped(monkeypatch): + """Archive snapshots are excluded regardless of age (spec 8.5).""" _mock_client( - [ - _make_snapshot( - "compressed-snap", - disk_size_gb=200, - storage_bytes=storage_bytes, - creation_timestamp=_ts(100), - ) - ], + [_make_snapshot("archive-snap", snapshot_type="ARCHIVE", creation_timestamp=_ts(200))], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + +def test_standard_snapshot_type_not_skipped(monkeypatch): + """STANDARD snapshot type is not excluded (spec 8.5 only excludes ARCHIVE).""" + _mock_client( + [_make_snapshot("std-snap", snapshot_type="STANDARD", creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 - expected_cost = round(50 * 0.026, 2) - assert findings[0].estimated_monthly_cost_usd == expected_cost -def test_cost_fallback_to_disk_size_gb(monkeypatch): - """When storage_bytes is 0, disk_size_gb is used for cost estimate.""" +# --------------------------------------------------------------------------- +# spec 8.6 / 8.7: schedule-created and auto-created exclusions +# --------------------------------------------------------------------------- + + +def test_auto_created_snapshot_skipped(monkeypatch): + """auto_created == True snapshots are skipped (spec 8.7).""" + _mock_client( + [_make_snapshot("auto-snap", auto_created=True, creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_schedule_policy_skipped(monkeypatch): + """Non-empty sourceSnapshotSchedulePolicy skips the snapshot (spec 8.6).""" _mock_client( [ _make_snapshot( - "fallback-snap", - disk_size_gb=100, - storage_bytes=0, + "sched-snap", + source_snapshot_schedule_policy="projects/p/regions/us/resourcePolicies/daily", creation_timestamp=_ts(100), ) ], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) - - assert len(findings) == 1 - expected_cost = round(100 * 0.026, 2) - assert findings[0].estimated_monthly_cost_usd == expected_cost + assert findings == [] -def test_zero_size_snapshot_no_cost(monkeypatch): - """A snapshot with zero disk_size_gb and zero storage_bytes has None cost.""" +def test_schedule_policy_id_skipped(monkeypatch): + """Non-empty sourceSnapshotSchedulePolicyId skips the snapshot (spec 8.6).""" _mock_client( [ _make_snapshot( - "empty-snap", - disk_size_gb=0, - storage_bytes=0, + "sched-id-snap", + source_snapshot_schedule_policy_id="1234567890", creation_timestamp=_ts(100), ) ], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] - assert len(findings) == 1 + +# --------------------------------------------------------------------------- +# spec 9.7 / 9.8: cost model and confidence contracts +# --------------------------------------------------------------------------- + + +def test_estimated_monthly_cost_is_none(monkeypatch): + """estimated_monthly_cost_usd must always be None (spec 9.7).""" + _mock_client([_make_snapshot("old-snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) assert findings[0].estimated_monthly_cost_usd is None -def test_custom_days_old_threshold(monkeypatch): - """max_age_days parameter controls the age threshold.""" +def test_estimated_monthly_cost_is_none_with_storage_bytes(monkeypatch): + """Even when storage_bytes is available, cost is still None (spec 9.7).""" + storage_bytes = 50 * (1024**3) _mock_client( - [_make_snapshot("mid-snap", creation_timestamp=_ts(50))], + [_make_snapshot("big-snap", disk_size_gb=200, storage_bytes=storage_bytes)], monkeypatch, ) - # With threshold=30, a 50-day-old snapshot should be flagged - findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock(), max_age_days=30) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 + assert findings[0].estimated_monthly_cost_usd is None - # With threshold=90 (default), same snapshot should not be flagged - findings2 = find_old_snapshots(project_id="proj-1", credentials=MagicMock(), max_age_days=90) - assert findings2 == [] + +def test_confidence_is_low(monkeypatch): + """Confidence must be LOW for all findings (spec 9.8).""" + _mock_client([_make_snapshot("old-snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].confidence == ConfidenceLevel.LOW -def test_region_filter_has_no_effect(monkeypatch): - """Snapshots are global resources — region_filter is ignored.""" +def test_confidence_is_low_when_source_disk_absent(monkeypatch): + """Confidence is still LOW even when source_disk is empty — no inference (spec 9.6 / 9.8).""" _mock_client( - [_make_snapshot("global-snap", creation_timestamp=_ts(100))], + [_make_snapshot("orphan-snap", source_disk="", creation_timestamp=_ts(100))], monkeypatch, ) - # Even with a region_filter, the snapshot is still returned (region_filter doesn't apply) - findings = find_old_snapshots( - project_id="proj-1", credentials=MagicMock(), region_filter="us-east1" + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].confidence == ConfidenceLevel.LOW + + +def test_risk_is_low(monkeypatch): + """Risk must be LOW for all findings (spec 9.9).""" + _mock_client([_make_snapshot("old-snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].risk == RiskLevel.LOW + + +# --------------------------------------------------------------------------- +# spec 9.6: source_disk must not infer "deleted" state +# --------------------------------------------------------------------------- + + +def test_no_source_disk_deleted_field_in_details(monkeypatch): + """details must not contain 'source_disk_deleted' (spec 9.6).""" + _mock_client( + [_make_snapshot("orphan-snap", source_disk="", creation_timestamp=_ts(100))], + monkeypatch, ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 + assert "source_disk_deleted" not in findings[0].details -def test_labels_in_details(monkeypatch): - """Labels on the snapshot appear in finding details.""" +# --------------------------------------------------------------------------- +# spec 10.3: required details fields +# --------------------------------------------------------------------------- + + +def test_age_days_in_details(monkeypatch): + """age_days should appear in details (spec 10.3).""" + _mock_client([_make_snapshot("old-snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["age_days"] >= 100 + + +def test_max_age_days_threshold_in_details(monkeypatch): + """max_age_days_threshold should appear in details as the configured threshold (spec 10.3).""" + _mock_client([_make_snapshot("old-snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock(), max_age_days=90) + assert findings[0].details["max_age_days_threshold"] == 90 + + +def test_storage_bytes_status_in_details(monkeypatch): + """storage_bytes_status should appear in details (spec 10.3).""" _mock_client( - [ - _make_snapshot( - "labeled-snap", - creation_timestamp=_ts(100), - labels={"backup-policy": "manual", "team": "data"}, - ) - ], + [_make_snapshot("snap", storage_bytes_status="UPDATING", creation_timestamp=_ts(100))], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["storage_bytes_status"] == "UPDATING" - assert len(findings) == 1 - assert findings[0].details["labels"] == {"backup-policy": "manual", "team": "data"} +def test_storage_bytes_status_absent_stored_as_none(monkeypatch): + """When storage_bytes_status is absent/empty, details stores None.""" + _mock_client( + [_make_snapshot("snap", storage_bytes_status="", creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["storage_bytes_status"] is None -def test_permission_denied_raises_permission_error(monkeypatch): - """PermissionDenied during list iteration raises PermissionError.""" - mock = MagicMock() - mock.list.return_value = iter(_RaiseOnIter(PermissionDenied("compute.snapshots.list denied"))) - monkeypatch.setattr( - "cleancloud.providers.gcp.rules.snapshot_old.compute_v1.SnapshotsClient", - lambda credentials: mock, + +def test_snapshot_type_in_details(monkeypatch): + """snapshot_type should appear in details (spec 10.3).""" + _mock_client( + [_make_snapshot("snap", snapshot_type="STANDARD", creation_timestamp=_ts(100))], + monkeypatch, ) - with pytest.raises(PermissionError, match="compute.snapshots.list"): - find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["snapshot_type"] == "STANDARD" -def test_forbidden_raises_permission_error(monkeypatch): - """Forbidden (HTTP 403) raises PermissionError.""" - mock = MagicMock() - mock.list.return_value = iter(_RaiseOnIter(Forbidden("403 Forbidden"))) - monkeypatch.setattr( - "cleancloud.providers.gcp.rules.snapshot_old.compute_v1.SnapshotsClient", - lambda credentials: mock, +def test_snapshot_type_absent_stored_as_none(monkeypatch): + """When snapshot_type is absent/empty, details stores None.""" + _mock_client( + [_make_snapshot("snap", snapshot_type="", creation_timestamp=_ts(100))], + monkeypatch, ) - with pytest.raises(PermissionError): - find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["snapshot_type"] is None -def test_not_found_returns_empty(monkeypatch): - """NotFound (Compute API not enabled) returns empty list.""" - mock = MagicMock() - mock.list.return_value = iter(_RaiseOnIter(NotFound("Compute Engine API not enabled"))) - monkeypatch.setattr( - "cleancloud.providers.gcp.rules.snapshot_old.compute_v1.SnapshotsClient", - lambda credentials: mock, +def test_auto_created_in_details(monkeypatch): + """auto_created field should appear in details for eligible snapshots (spec 10.3).""" + _mock_client( + [_make_snapshot("snap", auto_created=False, creation_timestamp=_ts(100))], + monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) - assert findings == [] + assert "auto_created" in findings[0].details + + +def test_source_disk_in_details_when_present(monkeypatch): + """source_disk should appear in details when non-empty (spec 10.3).""" + full_path = "zones/us-central1-a/disks/my-disk" + _mock_client( + [_make_snapshot("snap", source_disk=full_path, creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["source_disk"] == full_path + + +def test_source_disk_absent_from_details_when_empty(monkeypatch): + """source_disk should not appear in details when empty (spec 10.3 / 9.6).""" + _mock_client( + [_make_snapshot("snap", source_disk="", creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert "source_disk" not in findings[0].details + + +def test_no_source_disk_url_field(monkeypatch): + """details must not contain a 'source_disk_url' field — spec uses 'source_disk' only.""" + full_path = "projects/my-proj/zones/us-central1-a/disks/my-disk" + _mock_client( + [_make_snapshot("snap", source_disk=full_path, creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert "source_disk_url" not in findings[0].details + + +# --------------------------------------------------------------------------- +# spec 10.2: signals_used must disclose storage context +# --------------------------------------------------------------------------- + + +def test_storage_bytes_in_signals_as_context_only(monkeypatch): + """When storage_bytes > 0, signals_used should note it as context only (spec 10.2).""" + storage_bytes = 20 * (1024**3) + _mock_client( + [_make_snapshot("snap", storage_bytes=storage_bytes, creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert any("context only" in s for s in findings[0].evidence.signals_used) + + +def test_status_ready_in_signals(monkeypatch): + """signals_used must disclose status: READY (spec 10.2).""" + _mock_client([_make_snapshot("snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert any("READY" in s for s in findings[0].evidence.signals_used) + + +def test_age_and_threshold_in_signals(monkeypatch): + """signals_used must disclose age and threshold (spec 10.2).""" + _mock_client([_make_snapshot("snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock(), max_age_days=90) + combined = " ".join(findings[0].evidence.signals_used) + assert "90" in combined # threshold + assert "days" in combined + + +# --------------------------------------------------------------------------- +# Threshold / region_filter +# --------------------------------------------------------------------------- + + +def test_custom_days_old_threshold(monkeypatch): + """max_age_days parameter controls the age threshold.""" + _mock_client( + [_make_snapshot("mid-snap", creation_timestamp=_ts(50))], + monkeypatch, + ) + # With threshold=30, a 50-day-old snapshot should be flagged + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock(), max_age_days=30) + assert len(findings) == 1 + + # With threshold=90 (default), same snapshot should not be flagged + findings2 = find_old_snapshots(project_id="proj-1", credentials=MagicMock(), max_age_days=90) + assert findings2 == [] + + +def test_region_filter_has_no_effect(monkeypatch): + """Snapshots are global resources — region_filter is ignored (spec 9.1.3).""" + _mock_client( + [_make_snapshot("global-snap", creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots( + project_id="proj-1", credentials=MagicMock(), region_filter="us-east1" + ) + assert len(findings) == 1 + + +# --------------------------------------------------------------------------- +# Multiple snapshots +# --------------------------------------------------------------------------- def test_multiple_snapshots_mixed_age(monkeypatch): @@ -277,13 +462,51 @@ def test_multiple_snapshots_mixed_age(monkeypatch): assert names == {"old-1", "old-2"} +def test_exclusion_rules_reduce_findings(monkeypatch): + """Archive, auto-created, and schedule-created snapshots do not appear in findings.""" + _mock_client( + [ + _make_snapshot("eligible-snap", creation_timestamp=_ts(100)), + _make_snapshot("archive-snap", snapshot_type="ARCHIVE", creation_timestamp=_ts(100)), + _make_snapshot("auto-snap", auto_created=True, creation_timestamp=_ts(100)), + _make_snapshot( + "sched-snap", + source_snapshot_schedule_policy="projects/p/regions/r/resourcePolicies/pol", + creation_timestamp=_ts(100), + ), + ], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].details["snapshot_name"] == "eligible-snap" + + # --------------------------------------------------------------------------- -# storage_locations, source_disk_id, chain_name +# Labels, storage_locations, source_disk_id, chain_name # --------------------------------------------------------------------------- +def test_labels_in_details(monkeypatch): + """Labels on the snapshot appear in finding details.""" + _mock_client( + [ + _make_snapshot( + "labeled-snap", + creation_timestamp=_ts(100), + labels={"backup-policy": "manual", "team": "data"}, + ) + ], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["labels"] == {"backup-policy": "manual", "team": "data"} + + def test_storage_locations_in_details(monkeypatch): - """storage_locations should appear in details to show regional vs multi-regional.""" + """storage_locations should appear in details (spec 10.3).""" _mock_client( [_make_snapshot("snap-regional", storage_locations=["us-central1"])], monkeypatch, @@ -305,7 +528,7 @@ def test_empty_storage_locations_stored_as_empty_list(monkeypatch): def test_source_disk_id_in_details_when_present(monkeypatch): - """source_disk_id should appear in details when non-empty.""" + """source_disk_id should appear in details when non-empty (spec 10.3).""" _mock_client( [_make_snapshot("snap-with-id", source_disk_id="1234567890")], monkeypatch, @@ -316,7 +539,7 @@ def test_source_disk_id_in_details_when_present(monkeypatch): def test_source_disk_id_absent_when_empty(monkeypatch): - """source_disk_id should not appear in details when empty.""" + """source_disk_id should not appear in details when empty (spec 10.3).""" _mock_client( [_make_snapshot("snap-no-id", source_disk_id="")], monkeypatch, @@ -327,7 +550,7 @@ def test_source_disk_id_absent_when_empty(monkeypatch): def test_chain_name_in_details_when_present(monkeypatch): - """chain_name should appear in details when set.""" + """chain_name should appear in details when set (spec 10.3).""" _mock_client( [_make_snapshot("snap-chained", chain_name="weekly-backup-chain")], monkeypatch, @@ -338,7 +561,7 @@ def test_chain_name_in_details_when_present(monkeypatch): def test_chain_name_absent_when_empty(monkeypatch): - """chain_name should not appear in details when not set.""" + """chain_name should not appear in details when not set (spec 10.3).""" _mock_client( [_make_snapshot("snap-no-chain", chain_name="")], monkeypatch, @@ -348,17 +571,203 @@ def test_chain_name_absent_when_empty(monkeypatch): assert "chain_name" not in findings[0].details -def test_source_disk_url_in_details_when_disk_present(monkeypatch): - """Full source_disk URL should be stored in details alongside the short name.""" - full_url = "projects/my-proj/zones/us-central1-a/disks/my-disk" +def test_chain_name_in_signals_when_present(monkeypatch): + """chain_name should appear in signals_used (spec 10.2).""" + _mock_client( + [_make_snapshot("snap-chained", chain_name="weekly-backup-chain")], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + + assert any("weekly-backup-chain" in s for s in findings[0].evidence.signals_used) + + +# --------------------------------------------------------------------------- +# Finding shape +# --------------------------------------------------------------------------- + + +def test_resource_id_format(monkeypatch): + """resource_id should follow the canonical project/global/snapshots/{name} path.""" + _mock_client([_make_snapshot("my-snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="my-proj", credentials=MagicMock()) + assert findings[0].resource_id == "projects/my-proj/global/snapshots/my-snap" + + +def test_resource_type(monkeypatch): + """resource_type should be 'gcp.compute.snapshot'.""" + _mock_client([_make_snapshot("snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].resource_type == "gcp.compute.snapshot" + + +def test_region_is_global(monkeypatch): + """region should always be 'global' for snapshot findings (spec 10.1).""" + _mock_client([_make_snapshot("snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].region == "global" + + +# --------------------------------------------------------------------------- +# Failure behavior (spec 9.10) +# --------------------------------------------------------------------------- + + +def test_permission_denied_raises_permission_error(monkeypatch): + """PermissionDenied during list iteration raises PermissionError (spec 9.10).""" + mock = MagicMock() + mock.list.return_value = iter(_RaiseOnIter(PermissionDenied("compute.snapshots.list denied"))) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.snapshot_old.compute_v1.SnapshotsClient", + lambda credentials: mock, + ) + with pytest.raises(PermissionError, match="compute.snapshots.list"): + find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + + +def test_forbidden_raises_permission_error(monkeypatch): + """Forbidden (HTTP 403) raises PermissionError (spec 9.10).""" + mock = MagicMock() + mock.list.return_value = iter(_RaiseOnIter(Forbidden("403 Forbidden"))) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.snapshot_old.compute_v1.SnapshotsClient", + lambda credentials: mock, + ) + with pytest.raises(PermissionError): + find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + + +def test_not_found_returns_empty(monkeypatch): + """NotFound (Compute API not enabled) returns empty list (spec 9.10).""" + mock = MagicMock() + mock.list.return_value = iter(_RaiseOnIter(NotFound("Compute Engine API not enabled"))) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.snapshot_old.compute_v1.SnapshotsClient", + lambda credentials: mock, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Polish: spec-wording, defensive normalization, input-shape hardening +# --------------------------------------------------------------------------- + + +def test_chain_signal_exact_phrasing(monkeypatch): + """Chain signal must use exact spec phrasing (spec 10.2).""" _mock_client( - [_make_snapshot("snap-url", source_disk=full_url)], + [_make_snapshot("snap", chain_name="my-chain", creation_timestamp=_ts(100))], monkeypatch, ) findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert any( + s.startswith("Snapshot is part of a named incremental chain:") and "my-chain" in s + for s in findings[0].evidence.signals_used + ) + + +def test_chain_name_camelcase_fallback(monkeypatch): + """chain_name should fall back to chainName attribute when snake_case is absent.""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + del snap.chain_name # remove snake_case attribute to force fallback + snap.chainName = "camel-chain" + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["chain_name"] == "camel-chain" + assert any("camel-chain" in s for s in findings[0].evidence.signals_used) - assert findings[0].details["source_disk_url"] == full_url - assert findings[0].details["source_disk"] == "my-disk" # short name still present + +def test_missing_auto_created_attribute_does_not_skip(monkeypatch): + """A snapshot object with no auto_created attribute must not be skipped or crash.""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + del snap.auto_created # simulate object without the attribute + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +def test_none_schedule_policy_does_not_skip(monkeypatch): + """source_snapshot_schedule_policy == None must not trigger skip (not in (None,'') is False).""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + snap.source_snapshot_schedule_policy = None + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +# --------------------------------------------------------------------------- +# Fix 1: zero storage_bytes surfaced in signals (spec 9.7) +# --------------------------------------------------------------------------- + + +def test_zero_storage_bytes_surfaced_in_signals(monkeypatch): + """storage_bytes == 0 must still appear as billed-storage context in signals (spec 9.7).""" + _mock_client( + [_make_snapshot("snap", storage_bytes=0, creation_timestamp=_ts(100))], + monkeypatch, + ) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert any( + "Billed storage (storageBytes)" in s and "0.0 GB" in s + for s in findings[0].evidence.signals_used + ) + + +def test_storage_bytes_context_signal_always_present(monkeypatch): + """storageBytes context signal is always emitted, regardless of value (spec 9.7).""" + _mock_client([_make_snapshot("snap", creation_timestamp=_ts(100))], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert any("Billed storage (storageBytes)" in s for s in findings[0].evidence.signals_used) + + +# --------------------------------------------------------------------------- +# Fix 2: non-negative normalization for numeric context fields (spec 7) +# --------------------------------------------------------------------------- + + +def test_negative_disk_size_gb_normalized_to_zero(monkeypatch): + """Negative disk_size_gb values must normalize to 0, not be preserved (spec 7).""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + snap.disk_size_gb = -50 + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["disk_size_gb"] == 0 + + +def test_negative_storage_bytes_normalized_to_zero(monkeypatch): + """Negative storage_bytes values must normalize to 0, not be preserved (spec 7).""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + snap.storage_bytes = -1024 + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["storage_bytes"] == 0 + + +# --------------------------------------------------------------------------- +# Fix 3: malformed context fields fall back gracefully (spec 9.10) +# --------------------------------------------------------------------------- + + +def test_malformed_labels_falls_back_to_empty_dict(monkeypatch): + """If snapshot.labels is not dict-convertible, labels falls back to {} (spec 9.10).""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + snap.labels = 42 # int — dict(42) raises TypeError + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].details["labels"] == {} + + +def test_malformed_storage_locations_falls_back_to_empty_list(monkeypatch): + """If snapshot.storage_locations is not list-convertible, it falls back to [] (spec 9.10).""" + snap = _make_snapshot("snap", creation_timestamp=_ts(100)) + snap.storage_locations = 42 # int — list(42) raises TypeError + _mock_client([snap], monkeypatch) + findings = find_old_snapshots(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].details["storage_locations"] == [] # --------------------------------------------------------------------------- diff --git a/tests/cleancloud/providers/gcp/test_gcp_sql_instance_idle.py b/tests/cleancloud/providers/gcp/test_gcp_sql_instance_idle.py index 5b092d8..bf49d9b 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_sql_instance_idle.py +++ b/tests/cleancloud/providers/gcp/test_gcp_sql_instance_idle.py @@ -6,9 +6,15 @@ import pytest from cleancloud.core.confidence import ConfidenceLevel +from cleancloud.core.risk import RiskLevel from cleancloud.providers.gcp.rules.sql_instance_idle import find_idle_sql_instances +# Default create_time: 30 days ago — always old enough for the full 14-day window +def _old_create_time() -> str: + return (datetime.now(timezone.utc) - timedelta(days=30)).strftime("%Y-%m-%dT%H:%M:%SZ") + + def _make_instance( name, state="RUNNABLE", @@ -18,11 +24,13 @@ def _make_instance( tier="db-n1-standard-2", labels=None, create_time=None, + master_instance_name=None, availability_type="ZONAL", data_disk_size_gb=None, data_disk_type="PD_SSD", backup_retained_count=None, ): + """Build a minimal Cloud SQL instance dict as returned by the Admin API.""" settings = { "tier": tier, "userLabels": labels or {}, @@ -35,28 +43,41 @@ def _make_instance( settings["backupConfiguration"] = { "backupRetentionSettings": {"retainedBackups": backup_retained_count} } - instance = { + instance: dict = { "name": name, "state": state, "instanceType": instance_type, "region": region, "databaseVersion": database_version, "settings": settings, + # Always provide a createTime that is old enough unless the test overrides it + "createTime": create_time if create_time is not None else _old_create_time(), } - if create_time is not None: - instance["createTime"] = create_time + if master_instance_name: + instance["masterInstanceName"] = master_instance_name return instance -def _patch_sql_and_monitoring(monkeypatch, instances, has_connections=False): - """Patch both _list_sql_instances and _has_connections helpers.""" +def _patch_sql_and_monitoring( + monkeypatch, + instances, + active_connections_max=0.0, +): + """ + Patch _list_sql_instances and _query_active_connections. + + active_connections_max: + 0.0 → confirmed idle + >0.0 → active (instance will be skipped) + None → unresolved coverage (instance will be skipped) + """ monkeypatch.setattr( "cleancloud.providers.gcp.rules.sql_instance_idle._list_sql_instances", lambda project_id, credentials: instances, ) monkeypatch.setattr( - "cleancloud.providers.gcp.rules.sql_instance_idle._has_connections", - lambda client, project_id, instance_name, **kwargs: has_connections, + "cleancloud.providers.gcp.rules.sql_instance_idle._query_active_connections", + lambda client, project_id, instance_name, instance_region, window_start, window_end: active_connections_max, ) monkeypatch.setattr( "cleancloud.providers.gcp.rules.sql_instance_idle.monitoring_v3.MetricServiceClient", @@ -64,12 +85,17 @@ def _patch_sql_and_monitoring(monkeypatch, instances, has_connections=False): ) +# --------------------------------------------------------------------------- +# Basic detection +# --------------------------------------------------------------------------- + + def test_idle_instance_flagged(monkeypatch): - """A RUNNABLE instance with zero connections over 14 days is flagged.""" + """A RUNNABLE primary instance with zero connections over 14 days is flagged.""" _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("idle-db")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) @@ -80,6 +106,7 @@ def test_idle_instance_flagged(monkeypatch): assert "idle-db" in f.resource_id assert f.region == "us-central1" assert f.confidence == ConfidenceLevel.HIGH + assert f.risk == RiskLevel.HIGH def test_active_instance_not_flagged(monkeypatch): @@ -87,7 +114,7 @@ def test_active_instance_not_flagged(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("active-db")], - has_connections=True, + active_connections_max=3.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert findings == [] @@ -101,7 +128,7 @@ def test_non_runnable_instance_skipped(monkeypatch): _make_instance("suspended-db", state="SUSPENDED"), _make_instance("maintenance-db", state="MAINTENANCE"), ], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert findings == [] @@ -112,36 +139,158 @@ def test_read_replica_skipped(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("replica-db", instance_type="READ_REPLICA_INSTANCE")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert findings == [] -def test_cost_from_known_tier(monkeypatch): - """Monthly cost is populated for a known tier.""" +def test_on_premises_instance_skipped(monkeypatch): + """ON_PREMISES_INSTANCE type is skipped.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("expensive-db", tier="db-n1-standard-2")], - has_connections=False, + instances=[_make_instance("onprem-db", instance_type="ON_PREMISES_INSTANCE")], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] - assert len(findings) == 1 - assert findings[0].estimated_monthly_cost_usd == 93.10 + +# --------------------------------------------------------------------------- +# Replica exclusion: masterInstanceName (spec 8.6 / 9.4) +# --------------------------------------------------------------------------- -def test_cost_none_for_unknown_tier(monkeypatch): - """Unknown tiers result in None cost estimate.""" +def test_master_instance_name_skips(monkeypatch): + """Instance with masterInstanceName set is excluded even if instanceType is primary.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("custom-db", tier="db-custom-16-65536")], - has_connections=False, + instances=[ + _make_instance( + "pseudo-primary", + instance_type="CLOUD_SQL_INSTANCE", + master_instance_name="real-primary", + ) + ], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Age / full-window coverage (spec 8.7, 8.8, 9.5) +# --------------------------------------------------------------------------- + + +def test_missing_createtime_skips(monkeypatch): + """Instance with no createTime field is skipped.""" + instance = _make_instance("no-time-db") + del instance["createTime"] + _patch_sql_and_monitoring(monkeypatch, instances=[instance], active_connections_max=0.0) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_empty_createtime_skips(monkeypatch): + """Instance with empty createTime string is skipped.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("empty-time-db", create_time="")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_unparsable_createtime_skips(monkeypatch): + """Instance with unparsable createTime is skipped.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("bad-time-db", create_time="not-a-date")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_instance_too_young_for_full_window_skips(monkeypatch): + """Instance created within the idle window (e.g. 7 days ago for a 14-day window) is skipped.""" + recent = (datetime.now(timezone.utc) - timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%SZ") + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("young-db", create_time=recent)], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock(), idle_days=14) + assert findings == [] + + +def test_instance_just_within_24h_skipped(monkeypatch): + """Instance created 2 hours ago is skipped (much newer than window_start).""" + recent = (datetime.now(timezone.utc) - timedelta(hours=2)).strftime("%Y-%m-%dT%H:%M:%SZ") + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("brand-new-db", create_time=recent)], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + +def test_instance_older_than_idle_window_evaluated(monkeypatch): + """An instance created well before the idle window start is evaluated.""" + old = (datetime.now(timezone.utc) - timedelta(days=30)).strftime("%Y-%m-%dT%H:%M:%SZ") + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("old-db", create_time=old)], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 - assert findings[0].estimated_monthly_cost_usd is None + + +# --------------------------------------------------------------------------- +# Metric coverage (spec 8.9, 9.6–9.7) +# --------------------------------------------------------------------------- + + +def test_unresolved_metric_skips(monkeypatch): + """Instance where _query_active_connections returns None is skipped.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("no-metric-db")], + active_connections_max=None, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_monitoring_permission_denied_raises(monkeypatch): + """PermissionError from _query_active_connections propagates to caller.""" + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.sql_instance_idle._list_sql_instances", + lambda project_id, credentials: [_make_instance("db-1")], + ) + + def _raise_perm(*args, **kwargs): + raise PermissionError("monitoring.timeSeries.list permission required") + + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.sql_instance_idle._query_active_connections", + _raise_perm, + ) + monkeypatch.setattr( + "cleancloud.providers.gcp.rules.sql_instance_idle.monitoring_v3.MetricServiceClient", + lambda credentials: MagicMock(), + ) + with pytest.raises(PermissionError, match="monitoring.timeSeries.list"): + find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + + +# --------------------------------------------------------------------------- +# Region filter (spec 8.3) +# --------------------------------------------------------------------------- def test_region_filter(monkeypatch): @@ -152,7 +301,7 @@ def test_region_filter(monkeypatch): _make_instance("central-db", region="us-central1"), _make_instance("east-db", region="us-east1"), ], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances( project_id="proj-1", credentials=MagicMock(), region_filter="us-east1" @@ -162,13 +311,23 @@ def test_region_filter(monkeypatch): assert findings[0].details["region"] == "us-east1" +# --------------------------------------------------------------------------- +# Empty results +# --------------------------------------------------------------------------- + + def test_empty_instance_list_returns_empty(monkeypatch): """No Cloud SQL instances -> no findings.""" - _patch_sql_and_monitoring(monkeypatch, instances=[], has_connections=False) + _patch_sql_and_monitoring(monkeypatch, instances=[], active_connections_max=0.0) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert findings == [] +# --------------------------------------------------------------------------- +# Failure behavior (spec 9.13) +# --------------------------------------------------------------------------- + + def test_permission_error_propagates(monkeypatch): """PermissionError from _list_sql_instances propagates to caller.""" monkeypatch.setattr( @@ -195,17 +354,65 @@ def test_monitoring_client_error_returns_empty(monkeypatch): assert findings == [] -def test_labels_in_details(monkeypatch): - """userLabels from instance settings appear in finding details.""" +# --------------------------------------------------------------------------- +# Cost model (spec 9.10): always None +# --------------------------------------------------------------------------- + + +def test_estimated_monthly_cost_is_none(monkeypatch): + """estimated_monthly_cost_usd is always None — no flat tier lookup table.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("labeled-db", labels={"env": "staging", "owner": "team-a"})], - has_connections=False, + instances=[_make_instance("any-db", tier="db-n1-standard-2")], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].estimated_monthly_cost_usd is None + +def test_custom_tier_cost_also_none(monkeypatch): + """Custom tier (db-custom-*) also produces None cost estimate.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("custom-db", tier="db-custom-16-65536")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 - assert findings[0].details["labels"] == {"env": "staging", "owner": "team-a"} + assert findings[0].estimated_monthly_cost_usd is None + + +# --------------------------------------------------------------------------- +# Confidence / Risk (spec 9.11, 9.12): always HIGH +# --------------------------------------------------------------------------- + + +def test_confidence_always_high(monkeypatch): + """Confidence is always HIGH when a finding is emitted.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("any-db", tier="db-f1-micro")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings[0].confidence == ConfidenceLevel.HIGH + + +def test_risk_always_high(monkeypatch): + """Risk is always HIGH when a finding is emitted.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("any-db")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings[0].risk == RiskLevel.HIGH + + +# --------------------------------------------------------------------------- +# Finding details shape (spec 10.3) +# --------------------------------------------------------------------------- def test_details_include_tier_and_version(monkeypatch): @@ -213,17 +420,96 @@ def test_details_include_tier_and_version(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("typed-db", tier="db-f1-micro", database_version="MYSQL_8_0")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) - assert len(findings) == 1 assert findings[0].details["tier"] == "db-f1-micro" assert findings[0].details["database_version"] == "MYSQL_8_0" +def test_instance_type_in_details(monkeypatch): + """instance_type appears in finding details.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("primary-db")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["instance_type"] == "CLOUD_SQL_INSTANCE" + + +def test_created_at_in_details_is_iso_format(monkeypatch): + """created_at in details is an ISO 8601 string with T separator.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("dated-db")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + created_at = findings[0].details["created_at"] + assert isinstance(created_at, str) + assert "T" in created_at + + +def test_idle_days_threshold_in_details(monkeypatch): + """idle_days_threshold appears in finding details.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("any-db")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock(), idle_days=21) + assert findings[0].details["idle_days_threshold"] == 21 + + +def test_metric_coverage_in_details(monkeypatch): + """metric_coverage is 'FULL' in finding details.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("any-db")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["metric_coverage"] == "FULL" + + +def test_active_connections_max_in_details(monkeypatch): + """active_connections_max is 0.0 in finding details for an idle instance.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("idle-db")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["active_connections_max"] == 0.0 + + +def test_availability_type_in_details(monkeypatch): + """availability_type appears in finding details.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("zonal-db", availability_type="ZONAL")], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert findings[0].details["availability_type"] == "ZONAL" + + +def test_labels_in_details(monkeypatch): + """userLabels from instance settings appear in finding details.""" + _patch_sql_and_monitoring( + monkeypatch, + instances=[_make_instance("labeled-db", labels={"env": "staging", "owner": "team-a"})], + active_connections_max=0.0, + ) + findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + + assert findings[0].details["labels"] == {"env": "staging", "owner": "team-a"} + + # --------------------------------------------------------------------------- -# Storage, HA, backup retention, custom tier parsing +# HA context (spec 9.9) # --------------------------------------------------------------------------- @@ -232,7 +518,7 @@ def test_ha_enabled_in_details_and_signal(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("ha-db", availability_type="REGIONAL")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) @@ -245,7 +531,7 @@ def test_ha_disabled_no_ha_signal(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("zonal-db", availability_type="ZONAL")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) @@ -253,12 +539,17 @@ def test_ha_disabled_no_ha_signal(monkeypatch): assert not any("HA enabled" in s for s in findings[0].evidence.signals_used) +# --------------------------------------------------------------------------- +# Storage / backup context (spec 9.9) +# --------------------------------------------------------------------------- + + def test_storage_size_in_details_and_signal(monkeypatch): """data_disk_size_gb should appear in details and signals when present.""" _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("big-db", data_disk_size_gb=500)], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) @@ -271,19 +562,24 @@ def test_backup_retention_in_details(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("backup-db", backup_retained_count=14)], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) assert findings[0].details["backup_retained_count"] == 14 +# --------------------------------------------------------------------------- +# Custom tier CPU/memory parsing +# --------------------------------------------------------------------------- + + def test_custom_tier_cpu_memory_parsed(monkeypatch): """db-custom-{cpu}-{memory_mb} tier should be parsed into cpu_count and memory_gb.""" _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("custom-db", tier="db-custom-2-7680")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) @@ -296,7 +592,7 @@ def test_non_custom_tier_no_cpu_memory(monkeypatch): _patch_sql_and_monitoring( monkeypatch, instances=[_make_instance("std-db", tier="db-n1-standard-2")], - has_connections=False, + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) @@ -305,75 +601,52 @@ def test_non_custom_tier_no_cpu_memory(monkeypatch): # --------------------------------------------------------------------------- -# Cost-based confidence (Point 2) +# Evidence content (spec 10.2) # --------------------------------------------------------------------------- -def test_high_cost_tier_has_high_confidence(monkeypatch): - """Tiers costing > $50/month should produce HIGH confidence findings.""" - _patch_sql_and_monitoring( - monkeypatch, - instances=[_make_instance("expensive-db", tier="db-n1-standard-2")], # $93.10 - has_connections=False, - ) - findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) - - assert findings[0].confidence == ConfidenceLevel.HIGH - - -def test_low_cost_tier_has_high_confidence(monkeypatch): - """Zero connections for idle_days is HIGH confidence regardless of cost tier.""" +def test_evidence_discloses_runnable_state(monkeypatch): + """signals_used discloses RUNNABLE state.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("dev-db", tier="db-f1-micro")], # $7.67 - has_connections=False, + instances=[_make_instance("db-1")], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) - - assert findings[0].confidence == ConfidenceLevel.HIGH + assert any("RUNNABLE" in s for s in findings[0].evidence.signals_used) -def test_unknown_tier_has_high_confidence(monkeypatch): - """Unknown tier (no cost estimate) should still produce HIGH confidence.""" +def test_evidence_discloses_metric_coverage_full(monkeypatch): + """signals_used discloses full metric coverage.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("unknown-db", tier="db-custom-16-65536")], - has_connections=False, + instances=[_make_instance("db-1")], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) + assert any("FULL" in s for s in findings[0].evidence.signals_used) - assert findings[0].confidence == ConfidenceLevel.HIGH - - -# --------------------------------------------------------------------------- -# New instance skip (Point 3) -# --------------------------------------------------------------------------- - -def test_new_instance_within_24h_skipped(monkeypatch): - """An instance created less than 24 hours ago should not be flagged.""" - recent = (datetime.now(timezone.utc) - timedelta(hours=2)).strftime("%Y-%m-%dT%H:%M:%SZ") +def test_evidence_discloses_active_connections_max(monkeypatch): + """signals_used discloses the active_connections_max value.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("brand-new-db", create_time=recent)], - has_connections=False, + instances=[_make_instance("db-1")], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) - - assert findings == [] + assert any("active_connections_max" in s for s in findings[0].evidence.signals_used) -def test_older_instance_not_skipped_by_create_time(monkeypatch): - """An instance created more than 24 hours ago should still be evaluated.""" - old = (datetime.now(timezone.utc) - timedelta(days=30)).strftime("%Y-%m-%dT%H:%M:%SZ") +def test_evidence_signals_not_checked_present(monkeypatch): + """signals_not_checked is populated with known blind spots.""" _patch_sql_and_monitoring( monkeypatch, - instances=[_make_instance("old-db", create_time=old)], - has_connections=False, + instances=[_make_instance("db-1")], + active_connections_max=0.0, ) findings = find_idle_sql_instances(project_id="proj-1", credentials=MagicMock()) - - assert len(findings) == 1 + assert len(findings[0].evidence.signals_not_checked) > 0 # --------------------------------------------------------------------------- @@ -428,15 +701,292 @@ def test_list_sql_instances_returns_items(monkeypatch): # --------------------------------------------------------------------------- -# _has_connections unit tests +# _query_active_connections unit tests # --------------------------------------------------------------------------- -def test_has_connections_returns_true_on_exception(monkeypatch): - """If monitoring raises any exception, _has_connections returns True (conservative).""" - from cleancloud.providers.gcp.rules.sql_instance_idle import _has_connections +def _make_mock_point(val: int, dt: datetime): + """Create a mock monitoring data point with an integer value and UTC timestamp.""" + p = MagicMock() + p.value.WhichOneof.return_value = "int64_value" + p.value.int64_value = val + p.interval.end_time.seconds = int(dt.timestamp()) + p.interval.end_time.nanos = 0 + return p + + +def test_query_active_connections_no_series_returns_none(): + """No time series returned → unresolved coverage → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([]) # no series + + now = datetime.now(timezone.utc) + result = _query_active_connections( + mock_client, "proj-1", "db-1", "us-central1", now - timedelta(days=14), now + ) + assert result is None - bad_client = MagicMock() - bad_client.list_time_series.side_effect = Exception("monitoring down") - result = _has_connections(bad_client, "proj-1", "db-1") - assert result is True + +def test_query_active_connections_series_no_points_returns_none(): + """Series present but no points → unusable → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + mock_series = MagicMock() + mock_series.points = [] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + now = datetime.now(timezone.utc) + result = _query_active_connections( + mock_client, "proj-1", "db-1", "us-central1", now - timedelta(days=14), now + ) + assert result is None + + +def test_query_active_connections_zero_returns_zero(): + """Series with all-zero points spanning the window → max = 0.0 → confirmed idle.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + # Use a short window so two edge points cover it without triggering gap or edge checks + now = datetime.now(timezone.utc) + w_start = now - timedelta(minutes=3) + w_end = now + mock_series = MagicMock() + mock_series.points = [ + _make_mock_point(0, w_start + timedelta(seconds=30)), + _make_mock_point(0, w_end - timedelta(seconds=30)), + ] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result == 0.0 + + +def test_query_active_connections_nonzero_returns_max(): + """Series with nonzero points spanning the window → maximum value returned.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(minutes=4) + w_end = now + mock_series = MagicMock() + mock_series.points = [ + _make_mock_point(0, w_start + timedelta(seconds=30)), + _make_mock_point(5, w_start + timedelta(minutes=2)), + _make_mock_point(2, w_end - timedelta(seconds=30)), + ] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result == 5.0 + + +def test_query_active_connections_permission_denied_raises(): + """PermissionDenied from monitoring raises PermissionError.""" + from google.api_core.exceptions import PermissionDenied + + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + mock_client = MagicMock() + mock_client.list_time_series.side_effect = PermissionDenied("denied") + + now = datetime.now(timezone.utc) + with pytest.raises(PermissionError, match="monitoring.timeSeries.list"): + _query_active_connections( + mock_client, "proj-1", "db-1", "us-central1", now - timedelta(days=14), now + ) + + +def test_query_active_connections_generic_exception_returns_none(): + """Any unexpected exception from monitoring returns None (skip, don't false-positive).""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + mock_client = MagicMock() + mock_client.list_time_series.side_effect = Exception("unexpected error") + + now = datetime.now(timezone.utc) + result = _query_active_connections( + mock_client, "proj-1", "db-1", "us-central1", now - timedelta(days=14), now + ) + assert result is None + + +def test_query_active_connections_aggregates_multiple_series(): + """max is aggregated across all matched series (database label variants).""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(minutes=4) + w_end = now + + def _make_series(vals_and_times): + s = MagicMock() + s.points = [_make_mock_point(v, t) for v, t in vals_and_times] + return s + + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter( + [ + _make_series( + [ + (0, w_start + timedelta(seconds=30)), + (0, w_end - timedelta(seconds=30)), + ] + ), # database=mydb — all zero + _make_series( + [ + (0, w_start + timedelta(seconds=30)), + (3, w_end - timedelta(seconds=30)), + ] + ), # database=otherdb — nonzero peak + ] + ) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result == 3.0 + + +# --------------------------------------------------------------------------- +# Coverage quality tests (spec 9.6.8–9.6.9) +# --------------------------------------------------------------------------- + + +def test_query_active_connections_partial_window_start_returns_none(): + """Data starts too late (after window_start + tolerance) → partial window → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(days=14) + w_end = now + # Single point well past window_start + tolerance + mock_series = MagicMock() + mock_series.points = [_make_mock_point(0, w_start + timedelta(days=7))] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result is None + + +def test_query_active_connections_partial_window_end_returns_none(): + """Data ends too early (before window_end - tolerance) → partial window → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(days=14) + w_end = now + # Points only up to day 7 — end well before window_end - tolerance + mock_series = MagicMock() + mock_series.points = [ + _make_mock_point(0, w_start + timedelta(minutes=1)), + _make_mock_point(0, w_start + timedelta(days=7)), # stops at day 7 + ] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result is None + + +def test_query_active_connections_large_gap_returns_none(): + """Points at start and end but with a large gap in the middle → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(days=14) + w_end = now + # Points bound the window, but there is a multi-day gap in the middle + mock_series = MagicMock() + mock_series.points = [ + _make_mock_point(0, w_start + timedelta(minutes=1)), + _make_mock_point(0, w_start + timedelta(days=3)), # 3-day gap after previous + _make_mock_point(0, w_end - timedelta(minutes=1)), + ] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result is None + + +def test_query_active_connections_unreadable_timestamps_returns_none(): + """Any point whose timestamp cannot be parsed → coverage unresolved → None immediately.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + mock_point = MagicMock() + # Value is readable — the failure must be in the timestamp, not the value type + mock_point.value.WhichOneof.return_value = "int64_value" + mock_point.value.int64_value = 0 + # MagicMock for seconds causes datetime.fromtimestamp to raise TypeError + mock_point.interval.end_time.seconds = MagicMock() + mock_point.interval.end_time.nanos = 0 + mock_series = MagicMock() + mock_series.points = [mock_point] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + now = datetime.now(timezone.utc) + result = _query_active_connections( + mock_client, "proj-1", "db-1", "us-central1", now - timedelta(days=14), now + ) + assert result is None + + +def test_query_active_connections_unrecognized_value_type_returns_none(): + """Point with an unrecognized value type (not int64 or double) → unresolved → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(minutes=3) + mock_point = MagicMock() + mock_point.value.WhichOneof.return_value = "distribution_value" # not int64 or double + mock_point.interval.end_time.seconds = int(w_start.timestamp()) + 30 + mock_point.interval.end_time.nanos = 0 + mock_series = MagicMock() + mock_series.points = [mock_point] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, now) + assert result is None + + +def test_query_active_connections_unset_value_type_returns_none(): + """Point with no value field set (WhichOneof returns None) → unresolved → None.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(minutes=3) + mock_point = MagicMock() + mock_point.value.WhichOneof.return_value = None # no value oneof field set + mock_point.interval.end_time.seconds = int(w_start.timestamp()) + 30 + mock_point.interval.end_time.nanos = 0 + mock_series = MagicMock() + mock_series.points = [mock_point] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, now) + assert result is None + + +def test_query_active_connections_small_gaps_tolerated(): + """Gaps within tolerance (< 10 min) are allowed — full coverage is accepted.""" + from cleancloud.providers.gcp.rules.sql_instance_idle import _query_active_connections + + now = datetime.now(timezone.utc) + w_start = now - timedelta(minutes=30) + w_end = now + # Points every 5 minutes — gaps are 5 min, within the 10-min tolerance + times = [w_start + timedelta(minutes=i) for i in range(0, 31, 5)] + mock_series = MagicMock() + mock_series.points = [_make_mock_point(0, t) for t in times] + mock_client = MagicMock() + mock_client.list_time_series.return_value = iter([mock_series]) + + result = _query_active_connections(mock_client, "proj-1", "db-1", "us-central1", w_start, w_end) + assert result == 0.0 diff --git a/tests/cleancloud/providers/gcp/test_gcp_vm_stopped.py b/tests/cleancloud/providers/gcp/test_gcp_vm_stopped.py index 250015a..715191e 100644 --- a/tests/cleancloud/providers/gcp/test_gcp_vm_stopped.py +++ b/tests/cleancloud/providers/gcp/test_gcp_vm_stopped.py @@ -1,5 +1,6 @@ """Unit tests for gcp.compute.vm.stopped rule.""" +import warnings from datetime import datetime, timedelta, timezone from types import SimpleNamespace from unittest.mock import MagicMock @@ -8,8 +9,13 @@ from google.api_core.exceptions import NotFound, PermissionDenied from cleancloud.core.confidence import ConfidenceLevel +from cleancloud.core.risk import RiskLevel from cleancloud.providers.gcp.rules.vm_stopped import find_stopped_vms +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + def _ts(days_ago: int) -> str: """Return a GCP-format RFC3339 timestamp for N days ago.""" @@ -17,41 +23,63 @@ def _ts(days_ago: int) -> str: return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z") -def _make_disk(size_gb=100, disk_type="PERSISTENT", boot=False): - return SimpleNamespace(disk_size_gb=size_gb, type_=disk_type, boot=boot) +def _make_disk(size_gb=100, type_="PERSISTENT", boot=False): + return SimpleNamespace(disk_size_gb=size_gb, type_=type_, boot=boot) + + +def _make_metadata_item(key: str, value: str): + return SimpleNamespace(key=key, value=value) def _make_instance( - name, - status, + name="test-vm", + status="TERMINATED", last_stop_timestamp=None, + last_start_timestamp="", disks=None, machine_type="n1-standard-2", labels=None, automatic_restart=True, - last_start_timestamp="", + network_interfaces=None, + guest_accelerators=None, + metadata_items=None, ): + """Build a minimal Compute Engine instance object.""" + metadata = None + if metadata_items is not None: + metadata = SimpleNamespace(items=metadata_items) + return SimpleNamespace( name=name, status=status, - last_stop_timestamp=last_stop_timestamp or "", + # Default to 35 days ago — old enough for the 30-day threshold + last_stop_timestamp=_ts(35) if last_stop_timestamp is None else last_stop_timestamp, last_start_timestamp=last_start_timestamp, disks=disks or [], machine_type=f"zones/us-central1-a/machineTypes/{machine_type}", labels=labels or {}, scheduling=SimpleNamespace(automatic_restart=automatic_restart), + network_interfaces=network_interfaces or [], + guest_accelerators=guest_accelerators or [], + metadata=metadata, ) -def _make_scoped_instance_list(instances): - return SimpleNamespace(instances=instances) +def _make_scoped_list(instances, warning_code=None): + """Build a zone-scoped instance list, optionally with a partial-coverage warning.""" + ns = SimpleNamespace(instances=instances) + if warning_code: + ns.warning = SimpleNamespace(code=warning_code, message="partial coverage") + else: + ns.warning = None + return ns def _mock_client(zone_instance_map, monkeypatch): + """Patch InstancesClient.aggregated_list to return the supplied zone/instance map.""" mock = MagicMock() mock.aggregated_list.return_value = [ - (zone, _make_scoped_instance_list(instances)) - for zone, instances in zone_instance_map.items() + (zone, scoped_list) for zone, scoped_list in zone_instance_map.items() ] monkeypatch.setattr( "cleancloud.providers.gcp.rules.vm_stopped.compute_v1.InstancesClient", @@ -60,19 +88,15 @@ def _mock_client(zone_instance_map, monkeypatch): return mock -def test_terminated_vm_old_enough_is_flagged(monkeypatch): +# --------------------------------------------------------------------------- +# Basic detection +# --------------------------------------------------------------------------- + + +def test_stopped_vm_old_enough_is_flagged(monkeypatch): """TERMINATED VM stopped 35 days ago should produce a finding.""" _mock_client( - { - "zones/us-central1-a": [ - _make_instance( - "old-vm", - status="TERMINATED", - last_stop_timestamp=_ts(35), - disks=[_make_disk(size_gb=100)], - ) - ] - }, + {"zones/us-central1-a": _make_scoped_list([_make_instance("old-vm")])}, monkeypatch, ) findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) @@ -82,20 +106,31 @@ def test_terminated_vm_old_enough_is_flagged(monkeypatch): assert f.rule_id == "gcp.compute.vm.stopped" assert f.provider == "gcp" assert "old-vm" in f.resource_id - assert f.details["days_stopped"] >= 35 + assert f.region == "us-central1" -def test_running_vm_not_flagged(monkeypatch): - """RUNNING instance should not be flagged regardless of age.""" +def test_stopped_status_also_accepted(monkeypatch): + """status='STOPPED' is also a STOPPED_VM state and must be flagged.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("live-vm", status="RUNNING", last_stop_timestamp=_ts(60)) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("stopped-vm", status="STOPPED")] + ) }, monkeypatch, ) findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].details["raw_status"] == "STOPPED" + + +def test_running_vm_not_flagged(monkeypatch): + """RUNNING instance should not be flagged regardless of age.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("live-vm", status="RUNNING")])}, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) assert findings == [] @@ -103,9 +138,9 @@ def test_recently_stopped_not_flagged(monkeypatch): """TERMINATED VM stopped 5 days ago is below the 30-day threshold.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("new-stop", status="TERMINATED", last_stop_timestamp=_ts(5)) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("new-stop", last_stop_timestamp=_ts(5))] + ) }, monkeypatch, ) @@ -113,261 +148,832 @@ def test_recently_stopped_not_flagged(monkeypatch): assert findings == [] -def test_no_stop_timestamp_flagged_as_medium_confidence(monkeypatch): - """TERMINATED with no lastStopTimestamp should flag at MEDIUM confidence.""" +def test_staging_vm_not_flagged(monkeypatch): + """STAGING instance is a transitional state and must be skipped.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("mystery-vm", status="TERMINATED", last_stop_timestamp="") - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("staging-vm", status="STAGING")] + ) }, monkeypatch, ) findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_suspended_vm_not_flagged(monkeypatch): + """SUSPENDED instance has different billing semantics and must be skipped.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("suspended-vm", status="SUSPENDED")] + ) + }, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert findings == [] - assert len(findings) == 1 - assert findings[0].confidence == ConfidenceLevel.MEDIUM +# --------------------------------------------------------------------------- +# Stop timestamp contract (spec 8.6, 9.5) +# --------------------------------------------------------------------------- -def test_cost_calculated_from_attached_disks(monkeypatch): - """Monthly cost = total PERSISTENT disk GB * $0.04/GB.""" + +def test_missing_stop_timestamp_skips(monkeypatch): + """TERMINATED VM with no lastStopTimestamp must be skipped, not emitted.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "disk-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - disks=[ - _make_disk(size_gb=200, disk_type="PERSISTENT"), - _make_disk(size_gb=100, disk_type="PERSISTENT"), - _make_disk(size_gb=50, disk_type="SCRATCH"), # SCRATCH: excluded - ], - ) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("mystery-vm", last_stop_timestamp="")] + ) }, monkeypatch, ) findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert findings == [] - assert len(findings) == 1 - assert findings[0].estimated_monthly_cost_usd == round(300 * 0.04, 2) - assert findings[0].details["total_disk_gb"] == 300 + +def test_unparsable_stop_timestamp_skips(monkeypatch): + """TERMINATED VM with an unparsable lastStopTimestamp must be skipped.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("bad-ts-vm", last_stop_timestamp="not-a-date")] + ) + }, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +# --------------------------------------------------------------------------- +# Zone scope contract (spec 9.2) +# --------------------------------------------------------------------------- -def test_scratch_disks_excluded_from_cost(monkeypatch): - """Only PERSISTENT disks contribute to cost estimate.""" +def test_non_zone_scope_skipped(monkeypatch): + """Scope keys not in 'zones/ZONE' form must be skipped.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "scratch-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - disks=[_make_disk(size_gb=375, disk_type="SCRATCH")], - ) - ] + # 'global/' is not a zone scope + "global/": _make_scoped_list([_make_instance("global-vm")]), + "zones/us-central1-a": _make_scoped_list([_make_instance("zone-vm")]), }, monkeypatch, ) findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert "zone-vm" in findings[0].resource_id + +def test_regions_scope_skipped(monkeypatch): + """'regions/...' scope key must be skipped (not a zone scope).""" + _mock_client( + { + "regions/us-central1": _make_scoped_list([_make_instance("region-vm")]), + "zones/us-east1-b": _make_scoped_list([_make_instance("zone-vm")]), + }, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) assert len(findings) == 1 - # No PERSISTENT disk -> cost is $0 -> estimated_monthly_cost_usd is None - assert findings[0].estimated_monthly_cost_usd is None + assert "zone-vm" in findings[0].resource_id + + +# --------------------------------------------------------------------------- +# Region derivation and filter (spec 9.2) +# --------------------------------------------------------------------------- + + +def test_region_derived_from_zone(monkeypatch): + """Region in the finding is derived from the zone (drop trailing letter).""" + _mock_client( + {"zones/europe-west1-b": _make_scoped_list([_make_instance("eu-vm")])}, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert findings[0].region == "europe-west1" -def test_region_filter(monkeypatch): - """Only zones matching the region prefix should be scanned.""" +def test_region_filter_matches(monkeypatch): + """Only instances in the matching region are flagged.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("central-vm", status="TERMINATED", last_stop_timestamp=_ts(40)) - ], - "zones/eu-west1-b": [ - _make_instance("eu-vm", status="TERMINATED", last_stop_timestamp=_ts(40)) - ], + "zones/us-central1-a": _make_scoped_list([_make_instance("central-vm")]), + "zones/eu-west1-b": _make_scoped_list([_make_instance("eu-vm")]), }, monkeypatch, ) findings = find_stopped_vms( project_id="proj-1", credentials=MagicMock(), region_filter="eu-west1" ) - assert len(findings) == 1 assert "eu-vm" in findings[0].resource_id -def test_custom_days_stopped_threshold(monkeypatch): - """max_age_days parameter controls the threshold.""" +def test_region_filter_no_match_returns_empty(monkeypatch): + """Region filter that matches no zone returns no findings.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + findings = find_stopped_vms( + project_id="proj-1", credentials=MagicMock(), region_filter="asia-east1" + ) + assert findings == [] + + +def test_unknown_region_with_filter_skips(monkeypatch): + """Non-standard zone produces 'unknown' region; filter cannot evaluate → skip.""" + # Zone 'custom-a' → region 'custom' (no dash → falls through to 'unknown') + _mock_client( + {"zones/custom-a": _make_scoped_list([_make_instance("weird-vm")])}, + monkeypatch, + ) + findings = find_stopped_vms( + project_id="proj-1", credentials=MagicMock(), region_filter="custom" + ) + assert findings == [] + + +def test_unknown_region_with_filter_emits_warning(monkeypatch): + """Unknown region with active filter emits a UserWarning naming the zone and filter.""" + _mock_client( + {"zones/custom-a": _make_scoped_list([_make_instance("weird-vm")])}, + monkeypatch, + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + find_stopped_vms(project_id="proj-1", credentials=MagicMock(), region_filter="custom") + + assert any( + issubclass(w.category, UserWarning) + and "custom-a" in str(w.message) + and "region_filter" in str(w.message) + for w in caught + ) + + +def test_unknown_region_without_filter_emits(monkeypatch): + """Non-standard zone with 'unknown' region still emits when no filter is set.""" + _mock_client( + {"zones/custom-a": _make_scoped_list([_make_instance("weird-vm")])}, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + assert findings[0].region == "unknown" + + +# --------------------------------------------------------------------------- +# MIG exclusion (spec 9.4) +# --------------------------------------------------------------------------- + + +def test_mig_member_skipped(monkeypatch): + """Instance with 'created-by' referencing instanceGroupManagers must be skipped.""" + mig_instance = _make_instance( + "mig-vm", + metadata_items=[ + _make_metadata_item( + "created-by", + "projects/123/zones/us-central1-a/instanceGroupManagers/my-mig", + ) + ], + ) + _mock_client( + {"zones/us-central1-a": _make_scoped_list([mig_instance])}, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert findings == [] + + +def test_mig_exclusion_requires_exact_proof(monkeypatch): + """'created-by' without 'instanceGroupManagers/' must NOT trigger MIG exclusion.""" + non_mig_instance = _make_instance( + "standalone-vm", + metadata_items=[_make_metadata_item("created-by", "some-other-resource/my-tool")], + ) + _mock_client( + {"zones/us-central1-a": _make_scoped_list([non_mig_instance])}, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +def test_no_metadata_not_excluded(monkeypatch): + """Instance with no metadata at all must not be excluded by MIG check.""" + instance = _make_instance("no-meta-vm", metadata_items=None) + _mock_client( + {"zones/us-central1-a": _make_scoped_list([instance])}, + monkeypatch, + ) + findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert len(findings) == 1 + + +# --------------------------------------------------------------------------- +# Custom threshold +# --------------------------------------------------------------------------- + + +def test_custom_max_age_days(monkeypatch): + """max_age_days parameter controls the stop-age threshold.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("short-stop", status="TERMINATED", last_stop_timestamp=_ts(10)) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", last_stop_timestamp=_ts(10))] + ) }, monkeypatch, ) - # With threshold=7, a VM stopped 10 days ago should be flagged - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock(), max_age_days=7) - assert len(findings) == 1 + # With threshold=7, stopped 10 days ago → flagged + assert len(find_stopped_vms(project_id="p", credentials=MagicMock(), max_age_days=7)) == 1 + # With threshold=30, stopped 10 days ago → not flagged + assert find_stopped_vms(project_id="p", credentials=MagicMock(), max_age_days=30) == [] + - # With threshold=30 (default), same VM should not be flagged - findings2 = find_stopped_vms(project_id="proj-1", credentials=MagicMock(), max_age_days=30) - assert len(findings2) == 0 +# --------------------------------------------------------------------------- +# Confidence (spec 9.7) +# --------------------------------------------------------------------------- def test_short_stopped_vm_medium_confidence(monkeypatch): - """TERMINATED VM stopped 35 days ago (< 90 days) should have MEDIUM confidence.""" + """VM stopped 35 days ago (< 90 days) should have MEDIUM confidence.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("short-stop", status="TERMINATED", last_stop_timestamp=_ts(35)) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", last_stop_timestamp=_ts(35))] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) - - assert findings[0].confidence == ConfidenceLevel.MEDIUM + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].confidence + == ConfidenceLevel.MEDIUM + ) def test_long_stopped_vm_high_confidence(monkeypatch): - """TERMINATED VM stopped 95 days ago (>= 90 days) should have HIGH confidence.""" + """VM stopped 95 days ago (>= 90 days) should have HIGH confidence.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance("long-stop", status="TERMINATED", last_stop_timestamp=_ts(95)) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", last_stop_timestamp=_ts(95))] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].confidence + == ConfidenceLevel.HIGH + ) - assert findings[0].confidence == ConfidenceLevel.HIGH +# --------------------------------------------------------------------------- +# Risk (spec 9.8) +# --------------------------------------------------------------------------- -def test_boot_disk_count_in_details_and_signal(monkeypatch): - """Boot disk presence should appear in details and signals.""" + +def test_risk_always_medium(monkeypatch): + """Risk is always MEDIUM when a finding is emitted.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + assert find_stopped_vms(project_id="p", credentials=MagicMock())[0].risk == RiskLevel.MEDIUM + + +# --------------------------------------------------------------------------- +# Cost model (spec 9.6) +# --------------------------------------------------------------------------- + + +def test_estimated_monthly_cost_always_none(monkeypatch): + """estimated_monthly_cost_usd is always None regardless of disk size.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "abandoned-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - disks=[_make_disk(size_gb=50, boot=True), _make_disk(size_gb=100)], - ) - ] + "zones/us-central1-a": _make_scoped_list( + [ + _make_instance( + "big-disk-vm", + disks=[_make_disk(size_gb=1000)], + ) + ] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + findings = find_stopped_vms(project_id="p", credentials=MagicMock()) + assert findings[0].estimated_monthly_cost_usd is None + + +# --------------------------------------------------------------------------- +# Details shape (spec 10.3) +# --------------------------------------------------------------------------- + + +def test_raw_status_in_details(monkeypatch): + """raw_status must appear in details with the exact API value.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", status="TERMINATED")])}, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["raw_status"] + == "TERMINATED" + ) + + +def test_stop_age_days_in_details(monkeypatch): + """stop_age_days must appear in details.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", last_stop_timestamp=_ts(40))] + ) + }, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["stop_age_days"] >= 40 + ) + + +def test_max_age_days_threshold_in_details(monkeypatch): + """max_age_days_threshold must appear in details.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock(), max_age_days=14)[0].details[ + "max_age_days_threshold" + ] + == 14 + ) + + +def test_last_stop_timestamp_in_details_is_iso(monkeypatch): + """last_stop_timestamp in details is an ISO 8601 string.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + ts = find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["last_stop_timestamp"] + assert isinstance(ts, str) and "T" in ts + + +def test_mig_membership_false_in_details(monkeypatch): + """mig_membership is False for non-MIG instances that are emitted.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["mig_membership"] + is False + ) + + +def test_persistent_disk_count_in_details(monkeypatch): + """persistent_disk_count counts only PERSISTENT disks.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [ + _make_instance( + "vm", + disks=[ + _make_disk(100, "PERSISTENT"), + _make_disk(200, "PERSISTENT"), + _make_disk(375, "SCRATCH"), + ], + ) + ] + ) + }, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details[ + "persistent_disk_count" + ] + == 2 + ) + + +def test_persistent_disk_total_gb_in_details(monkeypatch): + """persistent_disk_total_gb sums only PERSISTENT disk sizes.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [ + _make_instance( + "vm", + disks=[ + _make_disk(200, "PERSISTENT"), + _make_disk(100, "PERSISTENT"), + _make_disk(375, "SCRATCH"), # excluded + ], + ) + ] + ) + }, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details[ + "persistent_disk_total_gb" + ] + == 300 + ) + + +def test_disk_kinds_present_in_details(monkeypatch): + """disk_kinds_present lists all distinct attached disk kinds.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [ + _make_instance( + "vm", + disks=[ + _make_disk(100, "PERSISTENT"), + _make_disk(375, "SCRATCH"), + ], + ) + ] + ) + }, + monkeypatch, + ) + kinds = find_stopped_vms(project_id="p", credentials=MagicMock())[0].details[ + "disk_kinds_present" + ] + assert sorted(kinds) == ["PERSISTENT", "SCRATCH"] - assert findings[0].details["boot_disk_count"] == 1 - assert any("Boot disk" in s for s in findings[0].evidence.signals_used) + +def test_boot_disk_count_in_details_and_signal(monkeypatch): + """Boot disk presence appears in details and signals.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [ + _make_instance( + "vm", + disks=[_make_disk(50, boot=True), _make_disk(100)], + ) + ] + ) + }, + monkeypatch, + ) + f = find_stopped_vms(project_id="p", credentials=MagicMock())[0] + assert f.details["boot_disk_count"] == 1 + assert any("Boot disk" in s for s in f.evidence.signals_used) def test_no_boot_disk_no_boot_signal(monkeypatch): """VMs with no boot disk should not emit a boot disk signal.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "data-only-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - disks=[_make_disk(size_gb=100, boot=False)], - ) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", disks=[_make_disk(100, boot=False)])] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + f = find_stopped_vms(project_id="p", credentials=MagicMock())[0] + assert f.details["boot_disk_count"] == 0 + assert not any("Boot disk" in s for s in f.evidence.signals_used) - assert findings[0].details["boot_disk_count"] == 0 - assert not any("Boot disk" in s for s in findings[0].evidence.signals_used) - -def test_unknown_stop_time_summary_does_not_claim_duration(monkeypatch): - """When stop time is unknown, summary should not imply a specific duration.""" +def test_external_nat_ip_present_in_details_and_signal(monkeypatch): + """external_nat_ip_present=True appears in details and signals.""" + nic = SimpleNamespace(access_configs=[SimpleNamespace(nat_ip="34.1.2.3")]) _mock_client( { - "zones/us-central1-a": [ - _make_instance("mystery-vm", status="TERMINATED", last_stop_timestamp="") - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", network_interfaces=[nic])] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + f = find_stopped_vms(project_id="p", credentials=MagicMock())[0] + assert f.details["external_nat_ip_present"] is True + assert any("External NAT IP" in s for s in f.evidence.signals_used) - # Summary should describe state, not claim a specific stopped duration - assert "duration unknown" in findings[0].summary + +def test_external_nat_ip_absent_in_details(monkeypatch): + """external_nat_ip_present=False when no access config has a natIP.""" + nic = SimpleNamespace(access_configs=[SimpleNamespace(nat_ip="")]) + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", network_interfaces=[nic])] + ) + }, + monkeypatch, + ) + f = find_stopped_vms(project_id="p", credentials=MagicMock())[0] + assert f.details["external_nat_ip_present"] is False + assert not any("External NAT IP" in s for s in f.evidence.signals_used) -def test_automatic_restart_false_in_details(monkeypatch): - """automatic_restart=False should be recorded in details.""" +def test_gpu_attached_in_details_and_signal(monkeypatch): + """gpu_attached=True appears in details and signals when accelerators present.""" + accel = SimpleNamespace(accelerator_count=1, accelerator_type="nvidia-tesla-t4") _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "no-restart-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - automatic_restart=False, - ) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", guest_accelerators=[accel])] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + f = find_stopped_vms(project_id="p", credentials=MagicMock())[0] + assert f.details["gpu_attached"] is True + assert any("GPU" in s for s in f.evidence.signals_used) + - assert findings[0].details["automatic_restart"] is False +def test_gpu_not_attached_in_details(monkeypatch): + """gpu_attached=False when no accelerators are present.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", guest_accelerators=[])])}, + monkeypatch, + ) + f = find_stopped_vms(project_id="p", credentials=MagicMock())[0] + assert f.details["gpu_attached"] is False + assert not any("GPU" in s for s in f.evidence.signals_used) def test_last_start_timestamp_in_details_when_present(monkeypatch): - """last_start_timestamp should appear in details when available.""" + """last_start_timestamp appears in details when non-empty.""" ts = "2024-01-01T10:00:00Z" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", last_start_timestamp=ts)])}, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["last_start_timestamp"] + == ts + ) + + +def test_last_start_timestamp_absent_when_empty(monkeypatch): + """last_start_timestamp is absent from details when empty.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", last_start_timestamp="")])}, + monkeypatch, + ) + assert ( + "last_start_timestamp" + not in find_stopped_vms(project_id="p", credentials=MagicMock())[0].details + ) + + +def test_automatic_restart_in_details_when_present(monkeypatch): + """automatic_restart appears in details when the scheduling field is present.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", automatic_restart=False)])}, + monkeypatch, + ) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["automatic_restart"] + is False + ) + + +def test_machine_type_parsed_from_url(monkeypatch): + """machine_type in details is the final URL segment, not the full resource path.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "old-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - last_start_timestamp=ts, - ) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", machine_type="e2-medium")] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + assert ( + find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["machine_type"] + == "e2-medium" + ) - assert findings[0].details["last_start_timestamp"] == ts +def test_labels_in_details(monkeypatch): + """Instance labels appear in details.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", labels={"env": "staging", "owner": "team-a"})] + ) + }, + monkeypatch, + ) + assert find_stopped_vms(project_id="p", credentials=MagicMock())[0].details["labels"] == { + "env": "staging", + "owner": "team-a", + } -def test_last_start_timestamp_absent_when_empty(monkeypatch): - """last_start_timestamp should not appear in details when empty.""" + +# --------------------------------------------------------------------------- +# Evidence shape (spec 10.2) +# --------------------------------------------------------------------------- + + +def test_evidence_discloses_stopped_state(monkeypatch): + """signals_used discloses the STOPPED_VM lifecycle state.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + sigs = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_used + assert any("STOPPED_VM" in s for s in sigs) + + +def test_evidence_discloses_stop_age_and_threshold(monkeypatch): + """signals_used discloses stop age in days and the threshold.""" _mock_client( { - "zones/us-central1-a": [ - _make_instance( - "old-vm", - status="TERMINATED", - last_stop_timestamp=_ts(40), - last_start_timestamp="", - ) - ] + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", last_stop_timestamp=_ts(40))] + ) }, monkeypatch, ) - findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + sigs = find_stopped_vms(project_id="p", credentials=MagicMock(), max_age_days=30)[ + 0 + ].evidence.signals_used + assert any("40" in s for s in sigs) + assert any("30" in s for s in sigs) - assert "last_start_timestamp" not in findings[0].details + +def test_evidence_discloses_disk_count_and_size(monkeypatch): + """signals_used discloses persistent disk count and total size.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm", disks=[_make_disk(200), _make_disk(100)])] + ) + }, + monkeypatch, + ) + sigs = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_used + assert any("300" in s for s in sigs) # total GB + assert any("2" in s for s in sigs) # disk count + + +def test_evidence_no_cost_estimate_in_signals(monkeypatch): + """signals_used must not include a flat disk cost estimate.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", disks=[_make_disk(500)])])}, + monkeypatch, + ) + sigs = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_used + assert not any("$/month" in s or "per GB" in s or "0.04" in s for s in sigs) + + +def test_evidence_disk_kinds_in_signals(monkeypatch): + """signals_used discloses attached disk kinds when present.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [ + _make_instance( + "vm", disks=[_make_disk(100, "PERSISTENT"), _make_disk(375, "SCRATCH")] + ) + ] + ) + }, + monkeypatch, + ) + sigs = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_used + assert any("SCRATCH" in s for s in sigs) + + +def test_evidence_automatic_restart_in_signals(monkeypatch): + """signals_used discloses automaticRestart context when scheduling present.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm", automatic_restart=False)])}, + monkeypatch, + ) + sigs = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_used + assert any("automaticRestart" in s for s in sigs) + + +def test_signals_not_checked_includes_blind_spots(monkeypatch): + """signals_not_checked covers the required blind spots for a normal (known-region) finding.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + snc = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_not_checked + combined = " ".join(snc) + assert "missing_last_stop_timestamp" in combined + assert len(snc) >= 4 + + +def test_region_unparseable_absent_when_region_known(monkeypatch): + """region_unparseable must NOT appear in signals_not_checked when region is known.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + snc = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_not_checked + combined = " ".join(snc) + assert "region_unparseable" not in combined + + +def test_region_unparseable_in_signals_not_checked_only_when_unknown(monkeypatch): + """region_unparseable is conditionally added to signals_not_checked only when region is unknown.""" + # Zone 'zones/badzone' → zone name 'badzone' → no dash in parts[0] → region 'unknown' + _mock_client( + {"zones/badzone": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + snc = find_stopped_vms(project_id="p", credentials=MagicMock())[0].evidence.signals_not_checked + combined = " ".join(snc) + assert "region_unparseable" in combined + + +def test_extra_path_in_zone_scope_skipped(monkeypatch): + """zones/ZONE/extra scope key must be rejected — only exact zones/ZONE is valid.""" + _mock_client( + { + "zones/us-central1-a/extra": _make_scoped_list([_make_instance("vm")]), + }, + monkeypatch, + ) + findings = find_stopped_vms(project_id="p", credentials=MagicMock()) + assert findings == [] + + +def test_malformed_instance_skipped_not_aborted(monkeypatch): + """A malformed instance record is skipped with a warning; valid siblings still emit.""" + + class _BrokenInstance: + name = "broken-vm" + + @property + def status(self): + raise AttributeError("simulated malformed record") + + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_BrokenInstance(), _make_instance("good-vm")])}, + monkeypatch, + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + findings = find_stopped_vms(project_id="p", credentials=MagicMock()) + + assert len(findings) == 1 + assert findings[0].details["instance_name"] == "good-vm" + assert any( + issubclass(w.category, UserWarning) and "malformed instance" in str(w.message) + for w in caught + ) + + +# --------------------------------------------------------------------------- +# Rule identity and resource shape +# --------------------------------------------------------------------------- + + +def test_rule_id_and_provider(monkeypatch): + """rule_id and provider are correct.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("vm")])}, + monkeypatch, + ) + f = find_stopped_vms(project_id="proj-1", credentials=MagicMock())[0] + assert f.rule_id == "gcp.compute.vm.stopped" + assert f.provider == "gcp" + assert f.resource_type == "gcp.compute.instance" + + +def test_resource_id_canonical_path(monkeypatch): + """resource_id uses the canonical projects/zones/instances path.""" + _mock_client( + {"zones/us-central1-a": _make_scoped_list([_make_instance("my-vm")])}, + monkeypatch, + ) + rid = find_stopped_vms(project_id="proj-1", credentials=MagicMock())[0].resource_id + assert rid == "projects/proj-1/zones/us-central1-a/instances/my-vm" + + +# --------------------------------------------------------------------------- +# Failure behavior (spec 9.9) +# --------------------------------------------------------------------------- def test_permission_denied_raises_permission_error(monkeypatch): - """PermissionDenied during aggregated_list should become PermissionError.""" + """PermissionDenied during aggregated_list should surface as PermissionError.""" mock = MagicMock() mock.aggregated_list.side_effect = PermissionDenied("compute.instances.list denied") monkeypatch.setattr( @@ -388,3 +994,23 @@ def test_not_found_returns_empty(monkeypatch): ) findings = find_stopped_vms(project_id="proj-1", credentials=MagicMock()) assert findings == [] + + +def test_partial_coverage_warning_emitted(monkeypatch): + """Partial-coverage scope warning is surfaced via warnings.warn.""" + _mock_client( + { + "zones/us-central1-a": _make_scoped_list( + [_make_instance("vm")], warning_code="NO_RESULTS_ON_PAGE" + ) + }, + monkeypatch, + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + find_stopped_vms(project_id="proj-1", credentials=MagicMock()) + + assert any( + issubclass(w.category, UserWarning) and "partial coverage" in str(w.message).lower() + for w in caught + ) diff --git a/tests/e2e/gcp/test_gcp_ai_rules_smoke.py b/tests/e2e/gcp/test_gcp_ai_rules_smoke.py index fd987c7..1f3d8b8 100644 --- a/tests/e2e/gcp/test_gcp_ai_rules_smoke.py +++ b/tests/e2e/gcp/test_gcp_ai_rules_smoke.py @@ -4,15 +4,15 @@ from google.auth.transport.requests import AuthorizedSession from cleancloud.core.finding import Finding -from cleancloud.providers.gcp.rules.featurestore_idle import find_idle_featurestores -from cleancloud.providers.gcp.rules.tpu_idle import find_idle_tpu_nodes -from cleancloud.providers.gcp.rules.vertex_endpoint_idle import ( +from cleancloud.providers.gcp.rules.ai.featurestore_idle import find_idle_featurestores +from cleancloud.providers.gcp.rules.ai.tpu_idle import find_idle_tpu_nodes +from cleancloud.providers.gcp.rules.ai.vertex_endpoint_idle import ( find_idle_vertex_endpoints, ) -from cleancloud.providers.gcp.rules.vertex_training_job_long_running import ( +from cleancloud.providers.gcp.rules.ai.vertex_training_job_long_running import ( find_long_running_vertex_training_jobs, ) -from cleancloud.providers.gcp.rules.workbench_idle import find_idle_workbench_instances +from cleancloud.providers.gcp.rules.ai.workbench_idle import find_idle_workbench_instances from cleancloud.providers.gcp.session import create_gcp_session _GCP_AI_RULE_IDS = {