feat: add certify subcommand — full certification workflow (#55)

hummbl-dev · Claude (agent) · claude · web-flow · commit e9f32c7e25dd · 2026-04-18T22:17:07.000-04:00
Combines code quality (50%), governance maturity (30%), and
dependency health (20%) into a certification decision:
CERTIFIED, PROVISIONAL, or FAILED.

Records result in VERUM audit trail. Completes Phase 4.

Co-authored-by: Claude (agent) &lt;claude@agents.hummbl.io&gt;
Co-authored-by: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/arbiter/__main__.py b/src/arbiter/__main__.py
@@ -389,6 +389,52 @@ def cmd_deps(args: argparse.Namespace) -> None:
         _print_footer()
 
 
+def cmd_certify(args: argparse.Namespace) -> None:
+    """Run full certification assessment."""
+    from arbiter.certify import certify
+    from arbiter.governance_score import score_governance
+    from arbiter.dep_score import find_and_score_deps
+    from arbiter.audit_trail import AuditTrail
+
+    repo_path = Path(args.repo).resolve()
+    exclude_paths = _parse_exclude_paths(args.exclude)
+
+    print(f"Certifying {repo_path.name}...", file=sys.stderr)
+
+    analyzers = _get_analyzers()
+    findings = _run_analysis(repo_path, analyzers, exclude_paths=exclude_paths)
+    findings = _apply_noise_filter(findings, getattr(args, "noise_threshold", None))
+    loc = count_loc(repo_path)
+    code_score = score_findings(findings, loc)
+    gov_report = score_governance(repo_path)
+    dep_report = find_and_score_deps(repo_path)
+
+    result = certify(code_score, gov_report, dep_report, findings)
+
+    if args.json:
+        print(json.dumps({
+            "decision": result.decision,
+            "overall": result.overall,
+            "code_score": result.code_score,
+            "governance_score": result.governance_score,
+            "dep_score": result.dep_score,
+            "findings": result.findings_count,
+            "reasons": result.reasons,
+        }, indent=2))
+    else:
+        print(result.summary())
+        _print_footer()
+
+    if not args.no_audit:
+        trail = AuditTrail(Path(args.trail))
+        trail.append(
+            repo=repo_path.name, score=result.overall, grade=result.decision,
+            findings=result.findings_count, loc=loc,
+            dimensions={"code": result.code_score, "governance": result.governance_score, "dependencies": result.dep_score},
+        )
+        print(f"Recorded in audit trail: {args.trail}", file=sys.stderr)
+
+
 def cmd_audit_trail(args: argparse.Namespace) -> None:
     """Manage the VERUM-aligned audit trail."""
     from arbiter.audit_trail import AuditTrail, verify_chain
@@ -2018,6 +2064,16 @@ def main() -> None:
     p_learn.add_argument("--ledger", default="contribution_learnings.jsonl", help="Path to JSONL ledger")
     p_learn.add_argument("--output", "-o", help="Write markdown to file")
 
+    # certify
+    p_certify = subparsers.add_parser("certify", help="Full certification assessment")
+    p_certify.add_argument("repo", help="Path to repository")
+    p_certify.add_argument("--json", action="store_true", help="JSON output")
+    p_certify.add_argument("--exclude", type=str, default="", help="Comma-separated exclude paths")
+    p_certify.add_argument("--noise-threshold", type=int, default=None, help="Noise threshold")
+    p_certify.add_argument("--trail", default="arbiter_audit.jsonl", help="Audit trail path")
+    p_certify.add_argument("--no-audit", action="store_true", help="Skip audit trail recording")
+    p_certify.add_argument("--fail-on-failed", action="store_true", help="Exit non-zero if FAILED")
+
     # audit-trail
     p_audit = subparsers.add_parser("audit-trail",
         help="VERUM-aligned append-only audit trail for scores")
@@ -2271,6 +2327,7 @@ def main() -> None:
         "contributions": cmd_contributions,
         "watch": cmd_watch,
         "languages": cmd_languages,
+        "certify": cmd_certify,
         "audit-trail": cmd_audit_trail,
         "deps": cmd_deps,
         "governance": cmd_governance,
diff --git a/src/arbiter/certify.py b/src/arbiter/certify.py
@@ -0,0 +1,121 @@
+"""Certify — full certification workflow combining all Arbiter dimensions.
+
+Produces a certification decision: CERTIFIED, PROVISIONAL, or FAILED
+based on code quality, governance maturity, dependency health, and
+compliance mapping. Records the decision in the VERUM audit trail.
+
+Stdlib only.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from arbiter.analyzers.base import Finding
+from arbiter.scoring import RepoScore
+
+
+@dataclass
+class CertificationResult:
+    """Result of a full certification assessment."""
+
+    decision: str  # CERTIFIED, PROVISIONAL, FAILED
+    code_score: float
+    governance_score: float
+    dep_score: float
+    overall: float
+    findings_count: int
+    governance_checks_passed: int
+    governance_checks_total: int
+    reasons: list[str] = field(default_factory=list)
+
+    @property
+    def passed(self) -> bool:
+        return self.decision in ("CERTIFIED", "PROVISIONAL")
+
+    def summary(self) -> str:
+        lines = [
+            f"Certification: {self.decision}",
+            "=" * 50,
+            f"  Code Quality:  {self.code_score:5.1f} / 100",
+            f"  Governance:    {self.governance_score:5.1f} / 100 ({self.governance_checks_passed}/{self.governance_checks_total} checks)",
+            f"  Dependencies:  {self.dep_score:5.1f} / 100",
+            f"  Overall:       {self.overall:5.1f} / 100",
+            f"  Findings:      {self.findings_count}",
+            "",
+        ]
+        if self.reasons:
+            lines.append("Reasons:")
+            for r in self.reasons:
+                lines.append(f"  - {r}")
+        return "\n".join(lines)
+
+
+# Certification thresholds
+CERT_THRESHOLDS = {
+    "certified": {"overall": 80, "code": 75, "governance": 60, "deps": 70},
+    "provisional": {"overall": 60, "code": 50, "governance": 40, "deps": 50},
+}
+
+
+def certify(
+    code_score: RepoScore,
+    governance_report,  # GovernanceReport
+    dep_report,  # DepReport
+    findings: list[Finding],
+) -> CertificationResult:
+    """Run full certification assessment.
+
+    Weights: code quality 50%, governance 30%, dependencies 20%.
+    """
+    code = code_score.overall if code_score.is_scorable else 0
+    gov = governance_report.score
+    deps = dep_report.score
+
+    overall = code * 0.50 + gov * 0.30 + deps * 0.20
+    overall = round(overall, 1)
+
+    reasons = []
+
+    # Determine decision
+    cert = CERT_THRESHOLDS["certified"]
+    prov = CERT_THRESHOLDS["provisional"]
+
+    if (overall >= cert["overall"] and code >= cert["code"]
+            and gov >= cert["governance"] and deps >= cert["deps"]):
+        decision = "CERTIFIED"
+    elif (overall >= prov["overall"] and code >= prov["code"]
+            and gov >= prov["governance"] and deps >= prov["deps"]):
+        decision = "PROVISIONAL"
+        if code < cert["code"]:
+            reasons.append(f"Code score {code:.1f} below certified threshold ({cert['code']})")
+        if gov < cert["governance"]:
+            reasons.append(f"Governance score {gov:.1f} below certified threshold ({cert['governance']})")
+        if deps < cert["deps"]:
+            reasons.append(f"Dependency score {deps:.1f} below certified threshold ({cert['deps']})")
+    else:
+        decision = "FAILED"
+        if code < prov["code"]:
+            reasons.append(f"Code score {code:.1f} below minimum threshold ({prov['code']})")
+        if gov < prov["governance"]:
+            reasons.append(f"Governance score {gov:.1f} below minimum threshold ({prov['governance']})")
+        if deps < prov["deps"]:
+            reasons.append(f"Dependency score {deps:.1f} below minimum threshold ({prov['deps']})")
+        if overall < prov["overall"]:
+            reasons.append(f"Overall score {overall:.1f} below minimum threshold ({prov['overall']})")
+
+    gov_passed = sum(1 for c in governance_report.checks if c.present)
+    gov_total = len(governance_report.checks)
+
+    return CertificationResult(
+        decision=decision,
+        code_score=code,
+        governance_score=gov,
+        dep_score=deps,
+        overall=overall,
+        findings_count=len(findings),
+        governance_checks_passed=gov_passed,
+        governance_checks_total=gov_total,
+        reasons=reasons,
+    )
diff --git a/tests/test_certify.py b/tests/test_certify.py
@@ -0,0 +1,69 @@
+"""Tests for certify — full certification workflow."""
+
+from arbiter.analyzers.base import Finding
+from arbiter.scoring import RepoScore
+from arbiter.certify import certify, CertificationResult
+
+
+class _FakeGovReport:
+    def __init__(self, score, checks_passed=8, checks_total=10):
+        self.score = score
+        self.checks = [type('C', (), {'present': True})()] * checks_passed + \
+                      [type('C', (), {'present': False})()] * (checks_total - checks_passed)
+
+
+class _FakeDepReport:
+    def __init__(self, score):
+        self.score = score
+
+
+def _score(overall=90, lint=92, sec=88, cx=90):
+    return RepoScore(overall=overall, lint_score=lint, security_score=sec,
+                     complexity_score=cx, total_findings=10)
+
+
+class TestCertify:
+    def test_certified(self):
+        result = certify(_score(90), _FakeGovReport(80), _FakeDepReport(85), [])
+        assert result.decision == "CERTIFIED"
+        assert result.passed
+
+    def test_provisional(self):
+        result = certify(_score(70), _FakeGovReport(55), _FakeDepReport(65), [])
+        assert result.decision == "PROVISIONAL"
+        assert result.passed
+
+    def test_failed(self):
+        result = certify(_score(30), _FakeGovReport(20), _FakeDepReport(30), [])
+        assert result.decision == "FAILED"
+        assert not result.passed
+
+    def test_failed_has_reasons(self):
+        result = certify(_score(30), _FakeGovReport(20), _FakeDepReport(30), [])
+        assert len(result.reasons) > 0
+
+    def test_overall_weighted(self):
+        result = certify(_score(80), _FakeGovReport(60), _FakeDepReport(70), [])
+        expected = 80 * 0.5 + 60 * 0.3 + 70 * 0.2  # 72
+        assert abs(result.overall - expected) < 0.2
+
+    def test_summary_includes_decision(self):
+        result = certify(_score(90), _FakeGovReport(80), _FakeDepReport(85), [])
+        assert "CERTIFIED" in result.summary()
+
+    def test_provisional_gives_reasons(self):
+        result = certify(_score(70), _FakeGovReport(50), _FakeDepReport(60), [])
+        assert result.decision == "PROVISIONAL"
+        assert any("below certified" in r for r in result.reasons)
+
+    def test_governance_checks_counted(self):
+        result = certify(_score(90), _FakeGovReport(80, 7, 10), _FakeDepReport(85), [])
+        assert result.governance_checks_passed == 7
+        assert result.governance_checks_total == 10
+
+    def test_non_scorable_code(self):
+        unscorable = RepoScore(overall=0, lint_score=0, security_score=0,
+                               complexity_score=0, total_findings=0, is_scorable=False)
+        result = certify(unscorable, _FakeGovReport(80), _FakeDepReport(85), [])
+        assert result.code_score == 0
+        assert result.decision == "FAILED"