Skip to content

Commit 0d48944

Browse files
authored
Azure rule hardening - Part 2 (#163)
* azure.app_service.idle * azure.app_service_plan.empty * azure.container_registry.unused * azure.compute.snapshot.old
1 parent 875b941 commit 0d48944

42 files changed

Lines changed: 5293 additions & 1298 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cleancloud/providers/aws/rules/ai/sagemaker_training_job_long_running.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def find_long_running_sagemaker_training_jobs(
279279
) -> List[Finding]:
280280
sagemaker = session.client("sagemaker", region_name=region)
281281

282-
# Spec §8: paginate WITHOUT StatusEquals — filter InProgress client-side.
282+
# Spec 8: paginate WITHOUT StatusEquals — filter InProgress client-side.
283283
# AWS documents that StatusEquals + MaxResults filters after paging, which can
284284
# silently miss InProgress jobs when pagination is truncated.
285285
try:
@@ -360,7 +360,7 @@ def find_long_running_sagemaker_training_jobs(
360360
active_training_hours = None
361361
elapsed_runtime_hours = nl["job_age_hours"]
362362

363-
# applicable_runtime_limit_seconds per spec §3
363+
# applicable_runtime_limit_seconds per spec 3
364364
if nd["enable_managed_spot_training"] and nd["max_wait_time_seconds"] is not None:
365365
applicable_runtime_limit_seconds = nd["max_wait_time_seconds"]
366366
elif training_start_time is not None and nd["max_runtime_seconds"] is not None:

cleancloud/providers/aws/rules/ami_old.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def _evaluate_ami(
104104
"""
105105
Evaluate one AMI against spec v4. Returns a Finding or None.
106106
107-
Mandatory evaluation order (spec §6):
107+
Mandatory evaluation order (spec 6):
108108
1. Parse + normalize
109109
2. Check deprecation override
110110
3. Fetch best-effort signals
@@ -181,7 +181,7 @@ def _evaluate_ami(
181181

182182
# ── 4. EXCLUSION_RULES ────────────────────────────────────────────────
183183
# Rule: recently launched (lastLaunchedTime must exist and be recent).
184-
# Missing lastLaunchedTime does NOT trigger exclusion (spec §5.A).
184+
# Missing lastLaunchedTime does NOT trigger exclusion (spec 5.A).
185185
if days_since_launched is not None and days_since_launched < _RECENTLY_ACTIVE_DAYS:
186186
return None
187187

@@ -201,7 +201,7 @@ def _evaluate_ami(
201201
return None
202202

203203
# Conservative: unknown active-instance state + borderline score → skip.
204-
# Absence of instance check ≠ absence of instances (spec §13).
204+
# Absence of instance check ≠ absence of instances (spec 13).
205205
if instance_check_failed and score == 1:
206206
return None
207207

@@ -222,10 +222,10 @@ def _evaluate_ami(
222222

223223
# ── 8. Risk ───────────────────────────────────────────────────────────
224224
# MEDIUM: non-deprecated + BOTH signals (score 2).
225-
# Guardrail: score 2 → risk MUST be >= MEDIUM (spec §9).
225+
# Guardrail: score 2 → risk MUST be >= MEDIUM (spec 9).
226226
risk = RiskLevel.MEDIUM if score == 2 else RiskLevel.LOW
227227

228-
# Title (spec §16 + §13):
228+
# Title (spec 16 + 13):
229229
# - Never label "Unused" if active instances exist.
230230
# - Never label "Unused" if active-instance state is UNKNOWN (check failed) —
231231
# missing visibility ≠ "no instances" ≠ "unused".
@@ -364,15 +364,15 @@ def _build_evidence( # noqa: PLR0913
364364
contextual_downgrade: bool,
365365
) -> Evidence:
366366
"""
367-
Build Evidence per spec §15.
367+
Build Evidence per spec 15.
368368
All fields must exist; null is allowed but fields must not be omitted.
369-
Evaluation path must be exactly "deprecated" or "scored" (spec §17).
369+
Evaluation path must be exactly "deprecated" or "scored" (spec 17).
370370
signals_not_checked: permission/visibility gaps first, then conceptual blind spots.
371371
"""
372372
signals: List[str] = []
373373
not_checked: List[str] = []
374374

375-
# evaluation_path (spec §17)
375+
# evaluation_path (spec 17)
376376
signals.append(f"evaluation_path: {evaluation_path}")
377377

378378
# age + state
@@ -515,7 +515,7 @@ def _check_active_instances(ec2, ami_id: str) -> Tuple[bool, bool]:
515515
"""
516516
Return (instances_found, check_failed).
517517
Existence check — MaxResults=5, EC2 filters server-side.
518-
CRITICAL (spec §13): check_failed ≠ "no instances" — treat result as UNKNOWN.
518+
CRITICAL (spec 13): check_failed ≠ "no instances" — treat result as UNKNOWN.
519519
"""
520520
try:
521521
resp = ec2.describe_instances(
@@ -534,7 +534,7 @@ def _check_active_instances(ec2, ami_id: str) -> Tuple[bool, bool]:
534534
def _build_lt_index(ec2) -> Tuple[Dict[str, List[str]], bool]:
535535
"""
536536
Build {ami_id: [lt_ids]} by checking $Default + $Latest versions of each LT.
537-
Per spec §11: "prefer $Default + $Latest; full traversal optional."
537+
Per spec 11: "prefer $Default + $Latest; full traversal optional."
538538
539539
Phase 1: list all LT IDs (with guard at _LT_INDEX_GUARD).
540540
Phase 2: per-LT, query $Default + $Latest versions only (no pagination needed).

cleancloud/providers/aws/rules/cloudwatch_logs_no_retention.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@
4040
from cleancloud.core.risk import RiskLevel
4141

4242
# Approximate CloudWatch Logs storage cost per GB-month (us-east-1, 2024).
43-
# Informational only — must NOT influence detection or confidence (spec §9).
43+
# Informational only — must NOT influence detection or confidence (spec 9).
4444
_STORAGE_COST_PER_GB_APPROX = 0.03
4545

46-
# Risk thresholds by stored size (spec §8)
46+
# Risk thresholds by stored size (spec 8)
4747
_HIGH_RISK_GB = 1.0 # ≥ 1 GB → HIGH; < 1 GB → MEDIUM; 0 bytes → LOW
4848

49-
# Only these classes are eligible (spec §2). Allowlist — unknown/missing class is NOT in scope.
49+
# Only these classes are eligible (spec 2). Allowlist — unknown/missing class is NOT in scope.
5050
_ELIGIBLE_CLASSES = {"STANDARD", "INFREQUENT_ACCESS"}
5151

5252

@@ -62,16 +62,16 @@ def find_cloudwatch_logs_no_retention(
6262

6363
for page in paginator.paginate():
6464
for lg in page.get("logGroups", []):
65-
# EXCLUSION: malformed record (spec §2)
65+
# EXCLUSION: malformed record (spec 2)
6666
if not lg.get("logGroupName"):
6767
continue
6868

69-
# EXCLUSION: only STANDARD and INFREQUENT_ACCESS are in scope (spec §2, §4A).
69+
# EXCLUSION: only STANDARD and INFREQUENT_ACCESS are in scope (spec 2, 4A).
7070
# DELIVERY is service-managed; unknown/missing class is not eligible.
7171
if lg.get("logGroupClass") not in _ELIGIBLE_CLASSES:
7272
continue
7373

74-
# EXCLUSION: retention policy is set — key presence check, not value check (spec §4A).
74+
# EXCLUSION: retention policy is set — key presence check, not value check (spec 4A).
7575
# "retentionInDays is not present in the returned log group object" means
7676
# key absent, not value null. An explicit null would still mean the key was
7777
# returned and should be treated as set.
@@ -91,22 +91,22 @@ def find_cloudwatch_logs_no_retention(
9191
creation_time = None
9292
age_days = None
9393

94-
# storedBytes is a non-billing, eventually-consistent storage metric (spec §3, §9).
94+
# storedBytes is a non-billing, eventually-consistent storage metric (spec 3, 9).
9595
# It must NOT be used as an activity signal.
9696
stored_bytes: Optional[int] = lg.get("storedBytes")
9797
stored_gb: Optional[float] = (
9898
(stored_bytes / (1024**3)) if stored_bytes is not None else None
9999
)
100100

101-
# Risk is proportional to stored size as a proxy for current storage exposure (spec §8)
101+
# Risk is proportional to stored size as a proxy for current storage exposure (spec 8)
102102
if stored_gb is not None and stored_gb >= _HIGH_RISK_GB:
103103
risk = RiskLevel.HIGH
104104
elif stored_bytes is not None and stored_bytes > 0:
105105
risk = RiskLevel.MEDIUM
106106
else:
107107
risk = RiskLevel.LOW
108108

109-
# Cost estimate — informational only (spec §9)
109+
# Cost estimate — informational only (spec 9)
110110
monthly_storage_cost: Optional[float] = None
111111
if stored_bytes is not None and stored_bytes > 0 and stored_gb is not None:
112112
monthly_storage_cost = round(stored_gb * _STORAGE_COST_PER_GB_APPROX, 2)

cleancloud/providers/aws/rules/ebs_snapshot_old.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
_DEFAULT_MAX_AGE_DAYS: int = 90
4747

48-
# Tag key prefix that indicates explicit AWS Backup management (spec §4, §5A.6).
48+
# Tag key prefix that indicates explicit AWS Backup management (spec 4, 5A.6).
4949
# Only aws:backup: is defined by this spec. DLM is not in scope.
5050
_BACKUP_TAG_PREFIX: str = "aws:backup:"
5151

@@ -95,7 +95,7 @@ def _check_external_sharing(ec2, snap_id: str) -> Tuple[bool, bool]:
9595

9696

9797
def _is_backup_managed(snap: dict) -> bool:
98-
"""Return True if the snapshot has an explicit aws:backup: tag (spec §4, §5A.6).
98+
"""Return True if the snapshot has an explicit aws:backup: tag (spec 4, 5A.6).
9999
100100
Only tag-based detection; full AWS Backup API integration is not in this spec.
101101
A negative result means UNKNOWN (no tag evidence found), not confirmed non-Backup.
@@ -113,7 +113,7 @@ def find_old_ebs_snapshots(
113113
) -> List[Finding]:
114114
ec2 = session.client("ec2", region_name=region)
115115

116-
# Build AMI snapshot index before evaluating snapshots (spec §5A.4, §6, §10).
116+
# Build AMI snapshot index before evaluating snapshots (spec 5A.4, 6, 10).
117117
# If this fails, AMI linkage cannot be verified → all candidates are skipped.
118118
ami_snapshot_ids, ami_index_failed = _build_ami_snapshot_index(ec2)
119119

@@ -127,42 +127,42 @@ def find_old_ebs_snapshots(
127127
snap_id = snap.get("SnapshotId")
128128
start_time = snap.get("StartTime")
129129

130-
# EXCLUSION: malformed record (spec §3)
130+
# EXCLUSION: malformed record (spec 3)
131131
if not snap_id or start_time is None:
132132
continue
133133

134-
# EXCLUSION: status != completed (spec §5A.1)
134+
# EXCLUSION: status != completed (spec 5A.1)
135135
if snap.get("State") != "completed":
136136
continue
137137

138-
# EXCLUSION: non-standard storage tier (spec §5A.2)
138+
# EXCLUSION: non-standard storage tier (spec 5A.2)
139139
# StorageTier absent → treated as standard per AWS default.
140140
storage_tier = snap.get("StorageTier", "standard")
141141
if storage_tier != "standard":
142142
continue
143143

144-
# EXCLUSION: age threshold (spec §5A.3)
144+
# EXCLUSION: age threshold (spec 5A.3)
145145
age_days = (now - start_time).days
146146
if age_days < max_age_days:
147147
continue
148148

149-
# EXCLUSION: AMI linkage (spec §5A.4, §10)
149+
# EXCLUSION: AMI linkage (spec 5A.4, 10)
150150
# If the index build failed, AMI linkage cannot be verified → SKIP.
151151
# Never treat missing visibility as "no AMI links".
152152
if ami_index_failed:
153153
continue
154154
if snap_id in ami_snapshot_ids:
155155
continue
156156

157-
# EXCLUSION: external sharing (spec §5A.5, §10)
157+
# EXCLUSION: external sharing (spec 5A.5, 10)
158158
# Per-snapshot check. If the check fails → SKIP that snapshot.
159159
shared_externally, sharing_check_failed = _check_external_sharing(ec2, snap_id)
160160
if sharing_check_failed:
161161
continue
162162
if shared_externally:
163163
continue
164164

165-
# EXCLUSION: explicit AWS Backup-managed (spec §5A.6)
165+
# EXCLUSION: explicit AWS Backup-managed (spec 5A.6)
166166
# Tag-based heuristic (aws:backup: prefix only). Only explicit True suppresses;
167167
# unknown (no tag evidence) does not block.
168168
if _is_backup_managed(snap):
@@ -201,7 +201,7 @@ def find_old_ebs_snapshots(
201201
resource_type="aws.ebs.snapshot",
202202
resource_id=snap_id,
203203
region=region,
204-
estimated_monthly_cost_usd=None, # spec §9: no cost from volumeSize
204+
estimated_monthly_cost_usd=None, # spec 9: no cost from volumeSize
205205
title="Old EBS snapshot review candidate",
206206
summary=(
207207
f"EBS snapshot is {age_days} days old "

cleancloud/providers/aws/rules/ebs_unattached.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def find_unattached_ebs_volumes(
264264
resource_type="aws.ebs.volume",
265265
resource_id=v["volume_id"],
266266
region=region,
267-
estimated_monthly_cost_usd=None, # spec §9: flat rate invalid across volume types
267+
estimated_monthly_cost_usd=None, # spec 9: flat rate invalid across volume types
268268
title="Unattached EBS volume review candidate",
269269
summary=(
270270
f"EBS volume has been unattached for {age_days} days "

cleancloud/providers/aws/rules/ec2_sg_unused.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def find_unused_security_groups(
359359
"and the default-group exclusion did not match"
360360
),
361361
risk=RiskLevel.LOW,
362-
confidence=ConfidenceLevel.MEDIUM, # spec §7: MEDIUM mandatory; HIGH not recommended
362+
confidence=ConfidenceLevel.MEDIUM, # spec 7: MEDIUM mandatory; HIGH not recommended
363363
detected_at=now,
364364
evidence=evidence,
365365
details=details,

cleancloud/providers/aws/rules/ec2_stopped.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def find_stopped_ec2_instances(
347347
now = datetime.now(timezone.utc)
348348
findings: List[Finding] = []
349349

350-
# Guard: lookup window must cover at least the threshold (spec §4).
350+
# Guard: lookup window must cover at least the threshold (spec 4).
351351
# A shorter window cannot prove the required stopped duration.
352352
if cloudtrail_lookup_days < stopped_age_threshold_days:
353353
return findings

cleancloud/providers/azure/rules/app_gateway_no_backends.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from cleancloud.core.risk import RiskLevel
4545

4646
# ---------------------------------------------------------------------------
47-
# Module constants (spec §17)
47+
# Module constants (spec 17)
4848
# ---------------------------------------------------------------------------
4949

5050
_EVALUATION_PATH = "app-gateway-no-backends"
@@ -125,11 +125,11 @@ def _normalize_pool(pool) -> Optional[dict]:
125125
raw_name = _get_str(pool, "name")
126126
pool_name = raw_name or _name_from_id(pool_id)
127127

128-
# backendAddresses — canonical target source (spec §2)
128+
# backendAddresses — canonical target source (spec 2)
129129
backend_addresses = _get_list(pool, "backend_addresses") or _get_list(pool, "backendAddresses")
130130
backend_addresses = [a for a in backend_addresses if a is not None]
131131

132-
# backendIPConfigurations — optional legacy/read-only field (spec §2)
132+
# backendIPConfigurations — optional legacy/read-only field (spec 2)
133133
legacy_cfgs = _get_list(pool, "backend_ip_configurations") or _get_list(
134134
pool, "backendIPConfigurations"
135135
)
@@ -421,7 +421,7 @@ def _traverse_url_path_map(
421421
pool_route_refs,
422422
pool_rule_ids,
423423
diags,
424-
ldp_keyword="LoadDistributionPolicy", # spec §4 canonical
424+
ldp_keyword="LoadDistributionPolicy", # spec 4 canonical
425425
)
426426

427427
# Path rules
@@ -627,7 +627,7 @@ def _traverse_gateway(
627627
or getattr(rule, "loadDistributionPolicy", None)
628628
)
629629

630-
# Redirect presence: spec §6 — presence of the field is sufficient; a malformed
630+
# Redirect presence: spec 6 — presence of the field is sufficient; a malformed
631631
# (non-null, non-resolvable) redirectConfiguration ref still counts as present.
632632
redirect_present = redirect_ref is not None
633633
if redirect_present and _norm_id(redirect_ref) is None:
@@ -728,7 +728,7 @@ def _traverse_gateway(
728728
now = datetime.now(timezone.utc)
729729

730730
# Build signals_not_checked: blind-spot strings + structured diagnostic dicts
731-
# (spec §5 diagnostics contract requires minimum structured shape)
731+
# (spec 5 diagnostics contract requires minimum structured shape)
732732
def _diag_key(diag) -> tuple:
733733
if isinstance(diag, dict):
734734
return (

0 commit comments

Comments
 (0)