Skip to content

Commit e9f32c7

Browse files
hummbl-devClaude (agent)claude
authored
feat: add certify subcommand — full certification workflow (#55)
Combines code quality (50%), governance maturity (30%), and dependency health (20%) into a certification decision: CERTIFIED, PROVISIONAL, or FAILED. Records result in VERUM audit trail. Completes Phase 4. Co-authored-by: Claude (agent) <claude@agents.hummbl.io> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a32593d commit e9f32c7

3 files changed

Lines changed: 247 additions & 0 deletions

File tree

src/arbiter/__main__.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,52 @@ def cmd_deps(args: argparse.Namespace) -> None:
389389
_print_footer()
390390

391391

392+
def cmd_certify(args: argparse.Namespace) -> None:
393+
"""Run full certification assessment."""
394+
from arbiter.certify import certify
395+
from arbiter.governance_score import score_governance
396+
from arbiter.dep_score import find_and_score_deps
397+
from arbiter.audit_trail import AuditTrail
398+
399+
repo_path = Path(args.repo).resolve()
400+
exclude_paths = _parse_exclude_paths(args.exclude)
401+
402+
print(f"Certifying {repo_path.name}...", file=sys.stderr)
403+
404+
analyzers = _get_analyzers()
405+
findings = _run_analysis(repo_path, analyzers, exclude_paths=exclude_paths)
406+
findings = _apply_noise_filter(findings, getattr(args, "noise_threshold", None))
407+
loc = count_loc(repo_path)
408+
code_score = score_findings(findings, loc)
409+
gov_report = score_governance(repo_path)
410+
dep_report = find_and_score_deps(repo_path)
411+
412+
result = certify(code_score, gov_report, dep_report, findings)
413+
414+
if args.json:
415+
print(json.dumps({
416+
"decision": result.decision,
417+
"overall": result.overall,
418+
"code_score": result.code_score,
419+
"governance_score": result.governance_score,
420+
"dep_score": result.dep_score,
421+
"findings": result.findings_count,
422+
"reasons": result.reasons,
423+
}, indent=2))
424+
else:
425+
print(result.summary())
426+
_print_footer()
427+
428+
if not args.no_audit:
429+
trail = AuditTrail(Path(args.trail))
430+
trail.append(
431+
repo=repo_path.name, score=result.overall, grade=result.decision,
432+
findings=result.findings_count, loc=loc,
433+
dimensions={"code": result.code_score, "governance": result.governance_score, "dependencies": result.dep_score},
434+
)
435+
print(f"Recorded in audit trail: {args.trail}", file=sys.stderr)
436+
437+
392438
def cmd_audit_trail(args: argparse.Namespace) -> None:
393439
"""Manage the VERUM-aligned audit trail."""
394440
from arbiter.audit_trail import AuditTrail, verify_chain
@@ -2018,6 +2064,16 @@ def main() -> None:
20182064
p_learn.add_argument("--ledger", default="contribution_learnings.jsonl", help="Path to JSONL ledger")
20192065
p_learn.add_argument("--output", "-o", help="Write markdown to file")
20202066

2067+
# certify
2068+
p_certify = subparsers.add_parser("certify", help="Full certification assessment")
2069+
p_certify.add_argument("repo", help="Path to repository")
2070+
p_certify.add_argument("--json", action="store_true", help="JSON output")
2071+
p_certify.add_argument("--exclude", type=str, default="", help="Comma-separated exclude paths")
2072+
p_certify.add_argument("--noise-threshold", type=int, default=None, help="Noise threshold")
2073+
p_certify.add_argument("--trail", default="arbiter_audit.jsonl", help="Audit trail path")
2074+
p_certify.add_argument("--no-audit", action="store_true", help="Skip audit trail recording")
2075+
p_certify.add_argument("--fail-on-failed", action="store_true", help="Exit non-zero if FAILED")
2076+
20212077
# audit-trail
20222078
p_audit = subparsers.add_parser("audit-trail",
20232079
help="VERUM-aligned append-only audit trail for scores")
@@ -2271,6 +2327,7 @@ def main() -> None:
22712327
"contributions": cmd_contributions,
22722328
"watch": cmd_watch,
22732329
"languages": cmd_languages,
2330+
"certify": cmd_certify,
22742331
"audit-trail": cmd_audit_trail,
22752332
"deps": cmd_deps,
22762333
"governance": cmd_governance,

src/arbiter/certify.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""Certify — full certification workflow combining all Arbiter dimensions.
2+
3+
Produces a certification decision: CERTIFIED, PROVISIONAL, or FAILED
4+
based on code quality, governance maturity, dependency health, and
5+
compliance mapping. Records the decision in the VERUM audit trail.
6+
7+
Stdlib only.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
from dataclasses import dataclass, field
13+
from pathlib import Path
14+
15+
from arbiter.analyzers.base import Finding
16+
from arbiter.scoring import RepoScore
17+
18+
19+
@dataclass
20+
class CertificationResult:
21+
"""Result of a full certification assessment."""
22+
23+
decision: str # CERTIFIED, PROVISIONAL, FAILED
24+
code_score: float
25+
governance_score: float
26+
dep_score: float
27+
overall: float
28+
findings_count: int
29+
governance_checks_passed: int
30+
governance_checks_total: int
31+
reasons: list[str] = field(default_factory=list)
32+
33+
@property
34+
def passed(self) -> bool:
35+
return self.decision in ("CERTIFIED", "PROVISIONAL")
36+
37+
def summary(self) -> str:
38+
lines = [
39+
f"Certification: {self.decision}",
40+
"=" * 50,
41+
f" Code Quality: {self.code_score:5.1f} / 100",
42+
f" Governance: {self.governance_score:5.1f} / 100 ({self.governance_checks_passed}/{self.governance_checks_total} checks)",
43+
f" Dependencies: {self.dep_score:5.1f} / 100",
44+
f" Overall: {self.overall:5.1f} / 100",
45+
f" Findings: {self.findings_count}",
46+
"",
47+
]
48+
if self.reasons:
49+
lines.append("Reasons:")
50+
for r in self.reasons:
51+
lines.append(f" - {r}")
52+
return "\n".join(lines)
53+
54+
55+
# Certification thresholds
56+
CERT_THRESHOLDS = {
57+
"certified": {"overall": 80, "code": 75, "governance": 60, "deps": 70},
58+
"provisional": {"overall": 60, "code": 50, "governance": 40, "deps": 50},
59+
}
60+
61+
62+
def certify(
63+
code_score: RepoScore,
64+
governance_report, # GovernanceReport
65+
dep_report, # DepReport
66+
findings: list[Finding],
67+
) -> CertificationResult:
68+
"""Run full certification assessment.
69+
70+
Weights: code quality 50%, governance 30%, dependencies 20%.
71+
"""
72+
code = code_score.overall if code_score.is_scorable else 0
73+
gov = governance_report.score
74+
deps = dep_report.score
75+
76+
overall = code * 0.50 + gov * 0.30 + deps * 0.20
77+
overall = round(overall, 1)
78+
79+
reasons = []
80+
81+
# Determine decision
82+
cert = CERT_THRESHOLDS["certified"]
83+
prov = CERT_THRESHOLDS["provisional"]
84+
85+
if (overall >= cert["overall"] and code >= cert["code"]
86+
and gov >= cert["governance"] and deps >= cert["deps"]):
87+
decision = "CERTIFIED"
88+
elif (overall >= prov["overall"] and code >= prov["code"]
89+
and gov >= prov["governance"] and deps >= prov["deps"]):
90+
decision = "PROVISIONAL"
91+
if code < cert["code"]:
92+
reasons.append(f"Code score {code:.1f} below certified threshold ({cert['code']})")
93+
if gov < cert["governance"]:
94+
reasons.append(f"Governance score {gov:.1f} below certified threshold ({cert['governance']})")
95+
if deps < cert["deps"]:
96+
reasons.append(f"Dependency score {deps:.1f} below certified threshold ({cert['deps']})")
97+
else:
98+
decision = "FAILED"
99+
if code < prov["code"]:
100+
reasons.append(f"Code score {code:.1f} below minimum threshold ({prov['code']})")
101+
if gov < prov["governance"]:
102+
reasons.append(f"Governance score {gov:.1f} below minimum threshold ({prov['governance']})")
103+
if deps < prov["deps"]:
104+
reasons.append(f"Dependency score {deps:.1f} below minimum threshold ({prov['deps']})")
105+
if overall < prov["overall"]:
106+
reasons.append(f"Overall score {overall:.1f} below minimum threshold ({prov['overall']})")
107+
108+
gov_passed = sum(1 for c in governance_report.checks if c.present)
109+
gov_total = len(governance_report.checks)
110+
111+
return CertificationResult(
112+
decision=decision,
113+
code_score=code,
114+
governance_score=gov,
115+
dep_score=deps,
116+
overall=overall,
117+
findings_count=len(findings),
118+
governance_checks_passed=gov_passed,
119+
governance_checks_total=gov_total,
120+
reasons=reasons,
121+
)

tests/test_certify.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
"""Tests for certify — full certification workflow."""
2+
3+
from arbiter.analyzers.base import Finding
4+
from arbiter.scoring import RepoScore
5+
from arbiter.certify import certify, CertificationResult
6+
7+
8+
class _FakeGovReport:
9+
def __init__(self, score, checks_passed=8, checks_total=10):
10+
self.score = score
11+
self.checks = [type('C', (), {'present': True})()] * checks_passed + \
12+
[type('C', (), {'present': False})()] * (checks_total - checks_passed)
13+
14+
15+
class _FakeDepReport:
16+
def __init__(self, score):
17+
self.score = score
18+
19+
20+
def _score(overall=90, lint=92, sec=88, cx=90):
21+
return RepoScore(overall=overall, lint_score=lint, security_score=sec,
22+
complexity_score=cx, total_findings=10)
23+
24+
25+
class TestCertify:
26+
def test_certified(self):
27+
result = certify(_score(90), _FakeGovReport(80), _FakeDepReport(85), [])
28+
assert result.decision == "CERTIFIED"
29+
assert result.passed
30+
31+
def test_provisional(self):
32+
result = certify(_score(70), _FakeGovReport(55), _FakeDepReport(65), [])
33+
assert result.decision == "PROVISIONAL"
34+
assert result.passed
35+
36+
def test_failed(self):
37+
result = certify(_score(30), _FakeGovReport(20), _FakeDepReport(30), [])
38+
assert result.decision == "FAILED"
39+
assert not result.passed
40+
41+
def test_failed_has_reasons(self):
42+
result = certify(_score(30), _FakeGovReport(20), _FakeDepReport(30), [])
43+
assert len(result.reasons) > 0
44+
45+
def test_overall_weighted(self):
46+
result = certify(_score(80), _FakeGovReport(60), _FakeDepReport(70), [])
47+
expected = 80 * 0.5 + 60 * 0.3 + 70 * 0.2 # 72
48+
assert abs(result.overall - expected) < 0.2
49+
50+
def test_summary_includes_decision(self):
51+
result = certify(_score(90), _FakeGovReport(80), _FakeDepReport(85), [])
52+
assert "CERTIFIED" in result.summary()
53+
54+
def test_provisional_gives_reasons(self):
55+
result = certify(_score(70), _FakeGovReport(50), _FakeDepReport(60), [])
56+
assert result.decision == "PROVISIONAL"
57+
assert any("below certified" in r for r in result.reasons)
58+
59+
def test_governance_checks_counted(self):
60+
result = certify(_score(90), _FakeGovReport(80, 7, 10), _FakeDepReport(85), [])
61+
assert result.governance_checks_passed == 7
62+
assert result.governance_checks_total == 10
63+
64+
def test_non_scorable_code(self):
65+
unscorable = RepoScore(overall=0, lint_score=0, security_score=0,
66+
complexity_score=0, total_findings=0, is_scorable=False)
67+
result = certify(unscorable, _FakeGovReport(80), _FakeDepReport(85), [])
68+
assert result.code_score == 0
69+
assert result.decision == "FAILED"

0 commit comments

Comments
 (0)