|
| 1 | +{ |
| 2 | + "id": "aether-arena-aa", |
| 3 | + "name": "AetherArena (AA) — Official Spatial-Intelligence Benchmark", |
| 4 | + "adr": "ADR-149", |
| 5 | + "adrPath": "docs/adr/ADR-149-public-community-leaderboard-huggingface.md", |
| 6 | + "status": "Accepted", |
| 7 | + "initializedDate": "2026-05-30", |
| 8 | + "targetDate": "2026-08-31", |
| 9 | + "exitCriteria": "Benchmark INFRASTRUCTURE done, tested, CI-gated, deploy-ready: aa_score_runner.rs passes deterministic fixture test; CI harness-gate green on every PR; aether-arena repo scaffold committed (README four-part framing + aa-submission.toml schema + VERIFY.md); public smoke split committed; HF Space lifecycle skeleton deployed; signed Parquet ledger functional; RuView baseline PCK@20 ~2.5% entered; ADR-149 §7 acceptance test (five-step stranger test) passes. NOTE: ML SOTA (MM-Fi PCK@20 ~72%) is a separate long-running stretch goal blocked on ADR-079 camera-ground-truth — it is NOT an infra exit criterion.", |
| 10 | + "baselineState": { |
| 11 | + "adrStatus": "Accepted, committed 2026-05-30", |
| 12 | + "scorerCode": "ruview_metrics.rs + ablation.rs + proof.rs exist in wifi-densepose-train; aa_score_runner.rs not yet created", |
| 13 | + "aetherArenaRepo": "does not exist yet — needs user authorization to create ruvnet/aether-arena public repo", |
| 14 | + "hfSpace": "does not exist yet — needs HF_TOKEN and user authorization to deploy ruvnet/aether-arena HF Space", |
| 15 | + "smokeDataset": "not committed", |
| 16 | + "resultsLedger": "not created", |
| 17 | + "ruviewBaseline": "PCK@20 ~2.5% self-reported, not formally entered", |
| 18 | + "ciGate": "not added to workflow" |
| 19 | + }, |
| 20 | + "milestones": { |
| 21 | + "m1": { |
| 22 | + "name": "ADR-149 Accepted + committed", |
| 23 | + "status": "DONE", |
| 24 | + "completedDate": "2026-05-30", |
| 25 | + "completionCriteria": "ADR-149 file committed to docs/adr/ with status Accepted", |
| 26 | + "notes": "Done this session. File at docs/adr/ADR-149-public-community-leaderboard-huggingface.md" |
| 27 | + }, |
| 28 | + "m2": { |
| 29 | + "name": "Deterministic scorer runner bin (aa_score_runner.rs)", |
| 30 | + "status": "NOT_STARTED", |
| 31 | + "completionCriteria": "aa_score_runner.rs compiles, runs ruview_metrics on a committed fixture, emits RuViewTier + SHA-256 proof hash, mirrors existing *_proof_runner.rs pattern; cargo test passes", |
| 32 | + "estimatedEffort": "3-5 days", |
| 33 | + "owner": "wifi-densepose-train crate or new aa-scorer crate" |
| 34 | + }, |
| 35 | + "m3": { |
| 36 | + "name": "CI harness-gate: GitHub Actions workflow", |
| 37 | + "status": "NOT_STARTED", |
| 38 | + "completionCriteria": "A GitHub Actions workflow runs aa_score_runner on every PR as a build gate; PR fails if scorer fails determinism check; workflow committed and green", |
| 39 | + "estimatedEffort": "2-3 days", |
| 40 | + "dependency": "M2 must be done first" |
| 41 | + }, |
| 42 | + "m4": { |
| 43 | + "name": "aether-arena repo scaffold", |
| 44 | + "status": "NOT_STARTED", |
| 45 | + "completionCriteria": "ruvnet/aether-arena repo created with: README (four-part framing: Public leaderboard / Private eval split / Open scorer / Signed results); aa-submission.toml manifest schema; VERIFY.md (ADR-149 §7 stranger acceptance test); neutrality/governance section (§2.8); contribution guide", |
| 46 | + "estimatedEffort": "3-5 days", |
| 47 | + "blockers": ["Needs user authorization to create public ruvnet/aether-arena repo on GitHub"] |
| 48 | + }, |
| 49 | + "m5": { |
| 50 | + "name": "Public smoke split committed + private MM-Fi held-out split prep", |
| 51 | + "status": "NOT_STARTED", |
| 52 | + "completionCriteria": "Public smoke split committed to aether-arena repo (stranger can score locally); private MM-Fi held-out split prepared under non-public path with CC BY-NC 4.0 attribution; Wi-Pose explicitly excluded from v0", |
| 53 | + "estimatedEffort": "5-7 days", |
| 54 | + "riskNotes": "MM-Fi CC BY-NC 4.0: AA must remain non-commercial and carry MM-Fi attribution; raw frames stay in private split; only derived CSI features + scores may be exposed" |
| 55 | + }, |
| 56 | + "m6": { |
| 57 | + "name": "HF Space (Gradio) skeleton", |
| 58 | + "status": "BLOCKED", |
| 59 | + "completionCriteria": "HF Space deployed at ruvnet/aether-arena with submission lifecycle (submitted->validated->quarantined->smoke_scored->full_scored->published/rejected); sandboxed scorer container wired; basic leaderboard table rendered", |
| 60 | + "estimatedEffort": "7-10 days", |
| 61 | + "blockers": [ |
| 62 | + "Needs HF_TOKEN — check .env for HF_TOKEN or HUGGINGFACE_TOKEN", |
| 63 | + "Needs user authorization to create/deploy ruvnet/aether-arena HF Space (outward-facing public deployment)" |
| 64 | + ] |
| 65 | + }, |
| 66 | + "m7": { |
| 67 | + "name": "Signed append-only Parquet results ledger", |
| 68 | + "status": "NOT_STARTED", |
| 69 | + "completionCriteria": "HF dataset ruvnet/aether-arena-results created; append-only Parquet ledger with signed rows; determinism_gate enforced; no row can be silently edited", |
| 70 | + "estimatedEffort": "3-5 days", |
| 71 | + "ledgerSchema": "submitter, model_ref, category, feature_set, tier, pck20, oks, mota, vitals_bpm_err, latency_p50, latency_p95, privacy_leakage, cross_room_deg, proof_sha256, scored_at, harness_version", |
| 72 | + "dependency": "M6 must be scaffolded first" |
| 73 | + }, |
| 74 | + "m8": { |
| 75 | + "name": "RuView baseline entry + public launch", |
| 76 | + "status": "NOT_STARTED", |
| 77 | + "completionCriteria": "RuView wifi-densepose-pretrained baseline entered (honest PCK@20 ~2.5%); ADR-149 §7 five-step stranger acceptance test passes; v0 live with Presence + Pose + Edge-latency + Determinism categories active; Privacy and Cross-room shown as gated/coming-soon", |
| 78 | + "estimatedEffort": "3-5 days", |
| 79 | + "dependency": "M4+M5+M6+M7 complete", |
| 80 | + "notes": "ML SOTA improvement (PCK@20 ~72%) is a SEPARATE stretch goal blocked on ADR-079 P7-P9 camera ground truth. NOT a blocker for infra launch." |
| 81 | + } |
| 82 | + }, |
| 83 | + "activeMilestone": "m2", |
| 84 | + "completedMilestones": ["m1"], |
| 85 | + "knownRisks": [ |
| 86 | + "HF_TOKEN not confirmed present in .env — check before M6 work begins", |
| 87 | + "ruvnet/aether-arena public repo creation is outward-facing — needs explicit user authorization", |
| 88 | + "MM-Fi CC BY-NC 4.0: AA must stay legally non-commercial and brand-distinct from commercial RuView product; or seek MM-Fi commercial grant before any paid tier", |
| 89 | + "Wi-Pose has research-use-only terms (no redistribution grant) — excluded from v0; revisit only if terms are clarified with authors", |
| 90 | + "HF Space free CPU tier may be too slow for Candle/tch inference pipeline — may need ZeroGPU or self-hosted scorer on cognitum-20260110 GCloud A100/L4", |
| 91 | + "ADR-079 camera-ground-truth (PCK@20 SOTA) is P7-P9 pending — NOT an infra blocker; must not be conflated with AA infra completion", |
| 92 | + "Neutrality/governance risk: RuView seeded the scorer — must be demonstrably scored through the same public pipeline as any other entrant (§2.8 controls)" |
| 93 | + ], |
| 94 | + "driftSignals": { |
| 95 | + "timeline": "GREEN — just initialized, no timeline pressure yet", |
| 96 | + "scope": "GREEN — scope locked at four-part structure per ADR-149 §2 decision", |
| 97 | + "approach": "GREEN — reuse pattern (existing ruview_metrics + proof.rs) confirmed in ADR-149", |
| 98 | + "dependency": "YELLOW — HF_TOKEN and ruvnet/aether-arena repo authorization are external blockers with unknown ETA", |
| 99 | + "priority": "GREEN — active feature branch feat/adr-136-146-streaming-engine in progress; AA infra can proceed in parallel on its own branch" |
| 100 | + }, |
| 101 | + "stretchGoals": { |
| 102 | + "sotaML": "MM-Fi PCK@20 SOTA ~72% — separate ML effort blocked on ADR-079 P7-P9 camera-ground-truth data collection; NOT an infra exit criterion", |
| 103 | + "privacyAxis": "ADR-145 §10 membership-inference attacker — activate Privacy leaderboard axis once attacker is implemented and published", |
| 104 | + "crossRoom": "Multi-room held-out split — activate Cross-room generalization axis", |
| 105 | + "multiOrgSteering": "Invite co-maintainers from other projects once >=N external entries land" |
| 106 | + }, |
| 107 | + "sessionHistory": [ |
| 108 | + { |
| 109 | + "date": "2026-05-30", |
| 110 | + "type": "initialization", |
| 111 | + "accomplished": [ |
| 112 | + "ADR-149 Accepted and committed to docs/adr/", |
| 113 | + "Horizon record initialized in .claude-flow/horizons/aether-arena-aa.json", |
| 114 | + "Memory stored in horizons namespace under key horizon-aether-arena-aa", |
| 115 | + "Session check-in record stored in horizon-sessions namespace" |
| 116 | + ] |
| 117 | + } |
| 118 | + ] |
| 119 | +} |
0 commit comments