Skip to content

Commit 1e9a8a7

Browse files
authored
feat(security): wire Dependabot alert posture into portfolio risk model (#27)
* feat(security): wire Dependabot alert posture into portfolio risk model Schema 0.4.0 -> 0.5.0: new SecurityFields (Dependabot / code-scanning / secret-scanning counts) on every PortfolioTruthProject. New risk factor active-high-severity-alerts — open high alerts add one factor toward the 3+ elevation threshold; an open critical alert force-elevates on its own, so a lone unpatched critical CVE cannot hide in a clean repo. Opt-in via --portfolio-truth-include-security, overlaying the latest output/ghas-alerts-<user>-*.json (mirrors the release_count overlay; the truth pipeline itself stays network-free / offline-testable). Weekly digest gains a ## Security Posture section distinguishing scanned-clean from unscanned repos. Fully inert unless fed: defaults keep the factor dormant and all existing risk tiers unchanged. 27 new tests across risk math, GHAS mapping, opt-in, force-elevate, deferred short-circuit, digest states, and the CLI loader. 2140 pass. * feat(security): join GHAS overlay by repo name, not just display name The security overlay is keyed by GitHub repo name, but local dir display names often differ ("Signal & Noise" vs "signal-noise"), so 40 repos with open alerts were silently missed. Extract `_select_security_entry`: match on the repo name from repo_full_name first, fall back to display_name. Live impact: overlay match rate 113 -> 153 of 161 local projects; e.g. Signal & Noise (9 high), Devil's Advocate (6), Interruption Resume Studio (3) now correctly join. 4 join-precedence tests added.
1 parent 9bf021d commit 1e9a8a7

10 files changed

Lines changed: 659 additions & 11 deletions

src/cli.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,15 @@ def build_parser() -> argparse.ArgumentParser:
620620
"output/audit-report-<username>-*.json warehouse file (requires a prior audit run)"
621621
),
622622
)
623+
parser.add_argument(
624+
"--portfolio-truth-include-security",
625+
action="store_true",
626+
help=(
627+
"Overlay the security.* GHAS alert counts on each project from the latest "
628+
"output/ghas-alerts-<username>-*.json file, feeding the active-high-severity-alerts "
629+
"risk factor (requires a prior `audit report --ghas-alerts` run)"
630+
),
631+
)
623632
parser.add_argument(
624633
"--portfolio-context-recovery",
625634
action="store_true",
@@ -5187,6 +5196,52 @@ def _load_release_count_by_name(*, output_dir: Path, username: str) -> dict[str,
51875196
return result
51885197

51895198

5199+
def _load_security_alerts_by_name(*, output_dir: Path, username: str) -> dict[str, dict] | None:
5200+
"""Load per-repo GHAS alert counts from the latest output/ghas-alerts-<username>-*.json.
5201+
5202+
The file is already keyed by display name in the shape SecurityFields expects
5203+
({name: {"dependabot": {...}, "code_scanning": {...}, "secret_scanning": {...}}}),
5204+
so the overlay needs no transformation. Returns None (with a warning) if no GHAS
5205+
report is found — the security overlay is then skipped, leaving counts at zero.
5206+
"""
5207+
import logging
5208+
5209+
_log = logging.getLogger(__name__)
5210+
5211+
ghas_files = sorted(
5212+
output_dir.glob(f"ghas-alerts-{username}-*.json"),
5213+
key=lambda p: p.stat().st_mtime,
5214+
)
5215+
if not ghas_files:
5216+
_log.warning(
5217+
"--portfolio-truth-include-security requires a prior `audit report --ghas-alerts` "
5218+
"run; no ghas-alerts-%s-*.json found in %s — skipping security overlay",
5219+
username,
5220+
output_dir,
5221+
)
5222+
return None
5223+
5224+
ghas_path = ghas_files[-1]
5225+
try:
5226+
with ghas_path.open() as fh:
5227+
data = json.load(fh)
5228+
except Exception as exc: # noqa: BLE001
5229+
_log.warning(
5230+
"--portfolio-truth-include-security: could not read %s: %s — skipping",
5231+
ghas_path,
5232+
exc,
5233+
)
5234+
return None
5235+
5236+
if not isinstance(data, dict):
5237+
_log.warning(
5238+
"--portfolio-truth-include-security: %s is not a name-keyed object — skipping",
5239+
ghas_path,
5240+
)
5241+
return None
5242+
return {name: entry for name, entry in data.items() if isinstance(entry, dict)}
5243+
5244+
51905245
def _run_portfolio_truth_mode(args) -> None:
51915246
from src.portfolio_truth_publish import publish_portfolio_truth
51925247

@@ -5211,6 +5266,13 @@ def _run_portfolio_truth_mode(args) -> None:
52115266
username=args.username,
52125267
)
52135268

5269+
security_alerts_by_name: dict[str, dict] | None = None
5270+
if getattr(args, "portfolio_truth_include_security", False):
5271+
security_alerts_by_name = _load_security_alerts_by_name(
5272+
output_dir=output_dir,
5273+
username=args.username,
5274+
)
5275+
52145276
result = publish_portfolio_truth(
52155277
workspace_root=workspace_root,
52165278
output_dir=output_dir,
@@ -5220,6 +5282,7 @@ def _run_portfolio_truth_mode(args) -> None:
52205282
legacy_registry_path=legacy_registry_path,
52215283
include_notion=True,
52225284
release_count_by_name=release_count_by_name,
5285+
security_alerts_by_name=security_alerts_by_name,
52235286
)
52245287
print_info(f"Portfolio truth snapshot: {result.latest_path}")
52255288
print_info(f"Portfolio truth history snapshot: {result.snapshot_path}")
@@ -5381,7 +5444,9 @@ def _run_context_triage_mode(args) -> None:
53815444
project_key = identity.get("project_key") or ""
53825445
name = identity.get("display_name") or project.get("name", "")
53835446
repo_keys.extend(key for key in (project_key, name) if key)
5384-
catalog_scores = validate_catalog(catalog_path, sorted(set(repo_keys))) if catalog_path.exists() else {}
5447+
catalog_scores = (
5448+
validate_catalog(catalog_path, sorted(set(repo_keys))) if catalog_path.exists() else {}
5449+
)
53855450

53865451
enriched: list[dict] = []
53875452
for project in projects:

src/portfolio_risk.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"missing-doctor-standard": "doctor standard not declared",
2828
"no-run-instructions": "run instructions missing",
2929
"undocumented-risks": "known risks not documented",
30+
"active-high-severity-alerts": "open high/critical security alerts",
3031
}
3132

3233
_DEFERRED_ARCHIVED = {
@@ -36,6 +37,7 @@
3637
"doctor_gap": False,
3738
"context_risk": False,
3839
"path_risk": False,
40+
"security_risk": False,
3941
}
4042

4143
_DEFERRED_STALE = {
@@ -45,6 +47,7 @@
4547
"doctor_gap": False,
4648
"context_risk": False,
4749
"path_risk": False,
50+
"security_risk": False,
4851
}
4952

5053

@@ -61,6 +64,8 @@ def build_risk_entry(
6164
doctor_standard: str,
6265
known_risks_present: bool,
6366
run_instructions_present: bool,
67+
security_high_alerts: int = 0,
68+
security_critical_alerts: int = 0,
6469
) -> dict[str, Any]:
6570
# Short-circuit deferred: archived or archive-path
6671
if registry_status == "archived" or operating_path == "archive":
@@ -91,9 +96,20 @@ def build_risk_entry(
9196
if criticality in {"high", "critical"} and not known_risks_present:
9297
factors.append("undocumented-risks")
9398

99+
# Active repo carrying open high- or critical-severity Dependabot alerts.
100+
# High alerts contribute one normal factor toward the 3+ elevation threshold;
101+
# an open critical alert force-elevates on its own (see is_elevated below) — a
102+
# lone unpatched critical CVE cannot hide in an otherwise-clean repo.
103+
active = activity_status in ACTIVE_STATUSES
104+
if active and (security_high_alerts > 0 or security_critical_alerts > 0):
105+
factors.append("active-high-severity-alerts")
106+
94107
# Derive tier
95-
is_elevated = len(factors) >= 3 or (
96-
"weak-context-active" in factors and "investigate-override" in factors
108+
security_forces_elevated = active and security_critical_alerts > 0
109+
is_elevated = (
110+
len(factors) >= 3
111+
or ("weak-context-active" in factors and "investigate-override" in factors)
112+
or security_forces_elevated
97113
)
98114
if is_elevated:
99115
risk_tier = "elevated"
@@ -106,6 +122,7 @@ def build_risk_entry(
106122
doctor_gap = "missing-doctor-standard" in factors
107123
context_risk = "weak-context-active" in factors
108124
path_risk = "investigate-override" in factors or "missing-operating-path" in factors
125+
security_risk = "active-high-severity-alerts" in factors
109126

110127
# Build summary
111128
if not factors:
@@ -121,6 +138,7 @@ def build_risk_entry(
121138
"doctor_gap": doctor_gap,
122139
"context_risk": context_risk,
123140
"path_risk": path_risk,
141+
"security_risk": security_risk,
124142
}
125143

126144

src/portfolio_truth_publish.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def publish_portfolio_truth(
3636
legacy_registry_path: Path | None = None,
3737
include_notion: bool = True,
3838
release_count_by_name: dict[str, int] | None = None,
39+
security_alerts_by_name: dict[str, dict] | None = None,
3940
) -> PortfolioTruthPublishResult:
4041
validate_publish_targets(
4142
workspace_root=workspace_root,
@@ -49,6 +50,7 @@ def publish_portfolio_truth(
4950
legacy_registry_path=legacy_registry_path,
5051
include_notion=include_notion,
5152
release_count_by_name=release_count_by_name,
53+
security_alerts_by_name=security_alerts_by_name,
5254
)
5355
validate_truth_snapshot(build_result.snapshot)
5456

src/portfolio_truth_reconcile.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
PortfolioTruthProject,
2929
PortfolioTruthSnapshot,
3030
RiskFields,
31+
SecurityFields,
3132
)
3233
from src.registry_parser import _normalize
3334

@@ -180,6 +181,7 @@ def build_portfolio_truth_snapshot(
180181
include_notion: bool = True,
181182
now: datetime | None = None,
182183
release_count_by_name: dict[str, int] | None = None,
184+
security_alerts_by_name: dict[str, dict] | None = None,
183185
) -> PortfolioTruthBuildResult:
184186
now = now or datetime.now(timezone.utc)
185187
catalog_data = load_portfolio_catalog(catalog_path)
@@ -199,6 +201,7 @@ def build_portfolio_truth_snapshot(
199201
notion_context=notion_context,
200202
now=now,
201203
release_count_by_name=release_count_by_name,
204+
security_alerts_by_name=security_alerts_by_name,
202205
)
203206
for raw_project in workspace_projects
204207
]
@@ -259,21 +262,55 @@ def _unresolved_duplicate_display_names(projects: list[PortfolioTruthProject]) -
259262
return sorted(
260263
name
261264
for name, members in grouped.items()
262-
if len(members) > 1
263-
and any(not _has_path_catalog_contract(project) for project in members)
265+
if len(members) > 1 and any(not _has_path_catalog_contract(project) for project in members)
264266
)
265267

266268

267269
def _has_path_catalog_contract(project: PortfolioTruthProject) -> bool:
268270
for source in project.provenance.values():
269-
if (
270-
source.get("source") == "catalog_repo"
271-
and source.get("detail") == project.identity.path
272-
):
271+
if source.get("source") == "catalog_repo" and source.get("detail") == project.identity.path:
273272
return True
274273
return False
275274

276275

276+
def _build_security_fields(ghas_entry: dict[str, Any] | None) -> SecurityFields:
277+
"""Map a per-repo GHAS alert entry (from output/ghas-alerts-<username>-*.json)
278+
into SecurityFields. A missing/None entry yields all-zero counts with
279+
alerts_available=False (the repo was not scanned) — distinct from a clean scan,
280+
and keeps the security overlay strictly opt-in (no entry → no security signal)."""
281+
if not ghas_entry:
282+
return SecurityFields()
283+
dependabot = ghas_entry.get("dependabot") or {}
284+
code_scanning = ghas_entry.get("code_scanning") or {}
285+
secret_scanning = ghas_entry.get("secret_scanning") or {}
286+
287+
def _count(source: dict[str, Any], key: str) -> int:
288+
value = source.get(key)
289+
return value if isinstance(value, int) and value >= 0 else 0
290+
291+
return SecurityFields(
292+
alerts_available=bool(dependabot.get("available", False)),
293+
dependabot_critical=_count(dependabot, "critical"),
294+
dependabot_high=_count(dependabot, "high"),
295+
dependabot_medium=_count(dependabot, "medium"),
296+
dependabot_low=_count(dependabot, "low"),
297+
code_scanning_critical=_count(code_scanning, "critical"),
298+
code_scanning_high=_count(code_scanning, "high"),
299+
secret_scanning_open=_count(secret_scanning, "open"),
300+
)
301+
302+
303+
def _select_security_entry(
304+
lookup: dict[str, dict], repo_full_name: str | None, display_name: str
305+
) -> dict | None:
306+
"""Join a project to its GHAS overlay entry. The overlay is keyed by GitHub repo
307+
name, but the local dir display_name often differs (e.g. "Signal & Noise" vs
308+
"signal-noise"), so match on the repo name from repo_full_name first and fall back
309+
to display_name only when repo_full_name is absent or unmatched."""
310+
repo_name = (repo_full_name or "").rsplit("/", 1)[-1]
311+
return lookup.get(repo_name) or lookup.get(display_name)
312+
313+
277314
def _build_truth_project(
278315
raw_project: dict[str, Any],
279316
*,
@@ -282,6 +319,7 @@ def _build_truth_project(
282319
notion_context: dict[str, dict[str, str]],
283320
now: datetime,
284321
release_count_by_name: dict[str, int] | None = None,
322+
security_alerts_by_name: dict[str, dict] | None = None,
285323
) -> PortfolioTruthProject:
286324
relative_path = raw_project["path"]
287325
group_entry = group_entry_for_path(relative_path, catalog_data)
@@ -393,6 +431,16 @@ def _build_truth_project(
393431
"detail": "derived",
394432
}
395433

434+
security_entry = _select_security_entry(
435+
security_alerts_by_name or {},
436+
raw_project.get("repo_full_name"),
437+
raw_project["name"],
438+
)
439+
security = _build_security_fields(security_entry)
440+
441+
# Only Dependabot high/critical counts drive the risk tier today. Code-scanning
442+
# and secret-scanning counts are captured in SecurityFields for visibility but do
443+
# not yet feed the active-high-severity-alerts factor (Dependabot-only scope).
396444
risk_entry = build_risk_entry(
397445
display_name=raw_project["name"],
398446
operating_path=path_entry.get("operating_path", ""),
@@ -405,6 +453,8 @@ def _build_truth_project(
405453
doctor_standard=declared_values["doctor_standard"],
406454
known_risks_present=bool(raw_project["known_risks_present"]),
407455
run_instructions_present=bool(raw_project["run_instructions_present"]),
456+
security_high_alerts=security.dependabot_high,
457+
security_critical_alerts=security.dependabot_critical,
408458
)
409459

410460
declared = DeclaredFields(
@@ -516,6 +566,7 @@ def _build_truth_project(
516566
doctor_gap=risk_entry["doctor_gap"],
517567
context_risk=risk_entry["context_risk"],
518568
path_risk=risk_entry["path_risk"],
569+
security_risk=risk_entry["security_risk"],
519570
)
520571
provenance["risk.risk_tier"] = {"source": "derived", "detail": risk_entry["risk_tier"]}
521572
provenance["risk.doctor_gap"] = {
@@ -527,6 +578,7 @@ def _build_truth_project(
527578
declared=declared,
528579
derived=derived,
529580
risk=risk,
581+
security=security,
530582
advisory=advisory,
531583
provenance=provenance,
532584
warnings=warnings,

src/portfolio_truth_types.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from datetime import datetime
66
from typing import Any
77

8-
SCHEMA_VERSION = "0.4.0"
8+
SCHEMA_VERSION = "0.5.0"
99

1010
VALID_CONTEXT_QUALITY = {"full", "standard", "minimum-viable", "boilerplate", "none"}
1111
VALID_ACTIVITY_STATUS = {"active", "recent", "stale", "archived"}
@@ -124,6 +124,32 @@ class RiskFields:
124124
doctor_gap: bool = False
125125
context_risk: bool = False
126126
path_risk: bool = False
127+
security_risk: bool = False
128+
129+
def to_dict(self) -> dict[str, Any]:
130+
return dataclasses.asdict(self)
131+
132+
133+
@dataclass(frozen=True)
134+
class SecurityFields:
135+
"""Live GitHub Advanced Security alert counts, overlaid opt-in from the latest
136+
output/ghas-alerts-<username>-*.json. When alerts_available is False the repo was
137+
not scanned (no token / GHAS disabled / not fetched) — distinct from a clean scan
138+
with zero open alerts, so consumers don't mislabel an unscanned repo as secure."""
139+
140+
alerts_available: bool = False
141+
dependabot_critical: int = 0
142+
dependabot_high: int = 0
143+
dependabot_medium: int = 0
144+
dependabot_low: int = 0
145+
code_scanning_critical: int = 0
146+
code_scanning_high: int = 0
147+
secret_scanning_open: int = 0
148+
149+
@property
150+
def open_high_critical(self) -> int:
151+
"""Dependabot high + critical — the security-risk-factor trigger surface."""
152+
return self.dependabot_high + self.dependabot_critical
127153

128154
def to_dict(self) -> dict[str, Any]:
129155
return dataclasses.asdict(self)
@@ -135,6 +161,7 @@ class PortfolioTruthProject:
135161
declared: DeclaredFields
136162
derived: DerivedFields
137163
risk: RiskFields = field(default_factory=RiskFields)
164+
security: SecurityFields = field(default_factory=SecurityFields)
138165
advisory: AdvisoryFields = field(default_factory=AdvisoryFields)
139166
provenance: dict[str, dict[str, str]] = field(default_factory=dict)
140167
warnings: list[str] = field(default_factory=list)
@@ -145,6 +172,7 @@ def to_dict(self) -> dict[str, Any]:
145172
"declared": self.declared.to_dict(),
146173
"derived": self.derived.to_dict(),
147174
"risk": self.risk.to_dict(),
175+
"security": self.security.to_dict(),
148176
"advisory": self.advisory.to_dict(),
149177
"provenance": self.provenance,
150178
"warnings": list(self.warnings),

0 commit comments

Comments
 (0)