|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +/** |
| 4 | + * media-use eval — compare baseline (no media-use) vs. with media-use |
| 5 | + * on real registry blocks. Produces an HTML report. |
| 6 | + */ |
| 7 | + |
| 8 | +import { mkdtempSync, cpSync, rmSync, readFileSync, readdirSync, existsSync, writeFileSync } from "node:fs"; |
| 9 | +import { join, basename, resolve, dirname } from "node:path"; |
| 10 | +import { execSync } from "node:child_process"; |
| 11 | +import { tmpdir } from "node:os"; |
| 12 | +import { fileURLToPath } from "node:url"; |
| 13 | + |
| 14 | +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); |
| 15 | +const REPO_ROOT = resolve(SCRIPT_DIR, "..", "..", ".."); |
| 16 | +const RESOLVE_SCRIPT = join(SCRIPT_DIR, "resolve.mjs"); |
| 17 | + |
| 18 | +const TEST_BLOCKS = [ |
| 19 | + "registry/blocks/nyc-paris-flight", |
| 20 | + "registry/blocks/macos-tahoe-liquid-glass", |
| 21 | + "registry/blocks/blue-sweater-intro-video", |
| 22 | + "registry/blocks/vpn-youtube-spot", |
| 23 | + "registry/blocks/apple-money-count", |
| 24 | + "registry/blocks/liquid-glass-notification", |
| 25 | + "registry/blocks/instagram-follow", |
| 26 | +]; |
| 27 | + |
| 28 | +function run(cmd, opts = {}) { |
| 29 | + try { |
| 30 | + return { ok: true, output: execSync(cmd, { encoding: "utf8", timeout: 15000, stdio: "pipe", ...opts }).trim() }; |
| 31 | + } catch (err) { |
| 32 | + return { ok: false, output: (err.stdout || "") + (err.stderr || ""), code: err.status }; |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +function countAssetFiles(dir) { |
| 37 | + const assetsDir = join(dir, "assets"); |
| 38 | + if (!existsSync(assetsDir)) return { count: 0, files: [] }; |
| 39 | + const files = []; |
| 40 | + function walk(d, base = "") { |
| 41 | + for (const e of readdirSync(d, { withFileTypes: true })) { |
| 42 | + const rel = base ? `${base}/${e.name}` : e.name; |
| 43 | + if (e.isDirectory()) walk(join(d, e.name), rel); |
| 44 | + else files.push(rel); |
| 45 | + } |
| 46 | + } |
| 47 | + walk(assetsDir); |
| 48 | + return { count: files.length, files }; |
| 49 | +} |
| 50 | + |
| 51 | +function evalBlock(blockPath) { |
| 52 | + const fullPath = join(REPO_ROOT, blockPath); |
| 53 | + if (!existsSync(fullPath)) return null; |
| 54 | + |
| 55 | + const name = basename(blockPath); |
| 56 | + const tmp = mkdtempSync(join(tmpdir(), `mu-eval-${name}-`)); |
| 57 | + |
| 58 | + try { |
| 59 | + cpSync(fullPath, tmp, { recursive: true }); |
| 60 | + |
| 61 | + // baseline: what the agent sees WITHOUT media-use |
| 62 | + const baseline = countAssetFiles(tmp); |
| 63 | + const htmlFiles = readdirSync(tmp).filter((f) => f.endsWith(".html")); |
| 64 | + |
| 65 | + // parse compositions for asset references |
| 66 | + const assetRefs = []; |
| 67 | + for (const hf of htmlFiles) { |
| 68 | + const html = readFileSync(join(tmp, hf), "utf8"); |
| 69 | + const srcMatches = html.matchAll(/src=["']([^"']+?)["']/g); |
| 70 | + for (const m of srcMatches) { |
| 71 | + const ref = m[1]; |
| 72 | + if (ref.startsWith("data:") || ref.startsWith("http")) continue; |
| 73 | + assetRefs.push({ composition: hf, ref }); |
| 74 | + } |
| 75 | + const urlMatches = html.matchAll(/url\(["']?([^"')]+?)["']?\)/g); |
| 76 | + for (const m of urlMatches) { |
| 77 | + const ref = m[1]; |
| 78 | + if (ref.startsWith("data:") || ref.startsWith("http") || ref.startsWith("#")) continue; |
| 79 | + assetRefs.push({ composition: hf, ref }); |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + // with media-use: run --adopt |
| 84 | + const adoptResult = run(`node "${RESOLVE_SCRIPT}" --adopt --project "${tmp}" --json`); |
| 85 | + let adopted = { ok: false, adopted: 0, assets: [] }; |
| 86 | + if (adoptResult.ok) { |
| 87 | + try { adopted = JSON.parse(adoptResult.output); } catch { /* */ } |
| 88 | + } |
| 89 | + |
| 90 | + // read the generated index |
| 91 | + const indexPath = join(tmp, ".media", "index.md"); |
| 92 | + const indexContent = existsSync(indexPath) ? readFileSync(indexPath, "utf8") : "(no index generated)"; |
| 93 | + |
| 94 | + // read manifest for detail |
| 95 | + const manifestPath = join(tmp, ".media", "manifest.jsonl"); |
| 96 | + const manifest = existsSync(manifestPath) |
| 97 | + ? readFileSync(manifestPath, "utf8").trim().split("\n").map((l) => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean) |
| 98 | + : []; |
| 99 | + |
| 100 | + // test resolve cache hit: try resolving something that was adopted |
| 101 | + let resolveTest = null; |
| 102 | + if (manifest.length > 0) { |
| 103 | + const first = manifest[0]; |
| 104 | + const prompt = first.provenance?.prompt || first.description; |
| 105 | + const r = run(`node "${RESOLVE_SCRIPT}" --type ${first.type} --intent "${prompt}" --project "${tmp}" --json`); |
| 106 | + if (r.ok) { |
| 107 | + try { resolveTest = JSON.parse(r.output); } catch { /* */ } |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + // test resolve miss: try resolving something that doesn't exist |
| 112 | + const missResult = run(`node "${RESOLVE_SCRIPT}" --type bgm --intent "nonexistent query xyz" --project "${tmp}" --json`); |
| 113 | + let resolveMiss = null; |
| 114 | + if (!missResult.ok) { |
| 115 | + try { resolveMiss = JSON.parse(missResult.output); } catch { /* */ } |
| 116 | + } |
| 117 | + |
| 118 | + // coverage: which composition refs are covered by the manifest |
| 119 | + const manifestPaths = new Set(manifest.map((m) => m.path)); |
| 120 | + const coverage = assetRefs.map((r) => ({ |
| 121 | + ...r, |
| 122 | + covered: manifestPaths.has(r.ref), |
| 123 | + })); |
| 124 | + |
| 125 | + return { |
| 126 | + name, |
| 127 | + baseline: { fileCount: baseline.count, files: baseline.files, htmlCount: htmlFiles.length }, |
| 128 | + compositions: htmlFiles, |
| 129 | + assetRefs: coverage, |
| 130 | + adopted: { count: adopted.adopted, assets: adopted.assets || [] }, |
| 131 | + index: indexContent, |
| 132 | + manifest, |
| 133 | + resolveTest, |
| 134 | + resolveMiss, |
| 135 | + }; |
| 136 | + } finally { |
| 137 | + rmSync(tmp, { recursive: true, force: true }); |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +function generateReport(results) { |
| 142 | + const all = results.filter(Boolean); |
| 143 | + const passed = all.filter((r) => r.adopted.count > 0); |
| 144 | + |
| 145 | + const rows = results |
| 146 | + .filter(Boolean) |
| 147 | + .map((r) => { |
| 148 | + const hasMetadata = r.manifest.some((m) => m.duration || m.width); |
| 149 | + const cacheHit = r.resolveTest?._source === "cached"; |
| 150 | + const missHandled = r.resolveMiss?.ok === false; |
| 151 | + |
| 152 | + return `<tr> |
| 153 | + <td><strong>${r.name}</strong></td> |
| 154 | + <td>${r.baseline.fileCount} files, ${r.baseline.htmlCount} comp${r.baseline.htmlCount === 1 ? "" : "s"}</td> |
| 155 | + <td>${r.adopted.count} adopted</td> |
| 156 | + <td>${hasMetadata ? "<span class='pass'>with metadata</span>" : "<span class='warn'>no metadata</span>"}</td> |
| 157 | + <td>${cacheHit ? "<span class='pass'>cache hit</span>" : "<span class='warn'>no hit</span>"}</td> |
| 158 | + <td>${missHandled ? "<span class='pass'>handled</span>" : "<span class='fail'>unexpected</span>"}</td> |
| 159 | + </tr>`; |
| 160 | + }) |
| 161 | + .join("\n"); |
| 162 | + |
| 163 | + const details = results |
| 164 | + .filter(Boolean) |
| 165 | + .filter((r) => r.adopted.count > 0) |
| 166 | + .map((r) => { |
| 167 | + const assetRows = r.manifest |
| 168 | + .map((m) => { |
| 169 | + const dur = m.duration != null ? `${m.duration}s` : "—"; |
| 170 | + const dims = m.width && m.height ? `${m.width}×${m.height}` : "—"; |
| 171 | + return `<tr><td>${m.id}</td><td>${m.type}</td><td>${dur}</td><td>${dims}</td><td class="path">${m.path}</td><td>${m.description || ""}</td></tr>`; |
| 172 | + }) |
| 173 | + .join("\n"); |
| 174 | + |
| 175 | + const coveredCount = r.assetRefs.filter((c) => c.covered).length; |
| 176 | + const totalRefs = r.assetRefs.length; |
| 177 | + const coveragePct = totalRefs > 0 ? Math.round((coveredCount / totalRefs) * 100) : 100; |
| 178 | + |
| 179 | + const refRows = r.assetRefs |
| 180 | + .map((c) => `<tr><td class="path">${c.composition}</td><td class="path">${c.ref}</td><td>${c.covered ? "<span class='pass'>covered</span>" : "<span class='warn'>not in manifest</span>"}</td></tr>`) |
| 181 | + .join("\n"); |
| 182 | + |
| 183 | + return `<div class="block-detail"> |
| 184 | + <h3>${r.name}</h3> |
| 185 | + <p style="font-size:13px;color:var(--muted)">${r.compositions.length} composition${r.compositions.length === 1 ? "" : "s"}: ${r.compositions.join(", ")}</p> |
| 186 | +
|
| 187 | + <div class="comparison"> |
| 188 | + <div class="col"> |
| 189 | + <h4>Baseline (no media-use)</h4> |
| 190 | + <p>Agent sees: ${r.baseline.fileCount} raw files in assets/<br>No metadata, no type info, no relationship to compositions.</p> |
| 191 | + <pre class="file-list">${r.baseline.files.join("\n") || "(no assets)"}</pre> |
| 192 | + </div> |
| 193 | + <div class="col"> |
| 194 | + <h4>With media-use (after --adopt)</h4> |
| 195 | + <p>Agent reads index.md — structured, typed, with metadata:</p> |
| 196 | + <pre class="index">${escapeHtml(r.index)}</pre> |
| 197 | + </div> |
| 198 | + </div> |
| 199 | +
|
| 200 | + ${totalRefs > 0 ? `<h4>Composition → asset coverage <span class="${coveragePct === 100 ? "pass" : "warn"}">${coveragePct}%</span> (${coveredCount}/${totalRefs} refs)</h4> |
| 201 | + <table class="manifest"> |
| 202 | + <thead><tr><th>composition</th><th>asset reference</th><th>in manifest?</th></tr></thead> |
| 203 | + <tbody>${refRows}</tbody> |
| 204 | + </table>` : ""} |
| 205 | +
|
| 206 | + <h4>Manifest records</h4> |
| 207 | + <table class="manifest"> |
| 208 | + <thead><tr><th>id</th><th>type</th><th>dur</th><th>dims</th><th>path</th><th>description</th></tr></thead> |
| 209 | + <tbody>${assetRows}</tbody> |
| 210 | + </table> |
| 211 | + </div>`; |
| 212 | + }) |
| 213 | + .join("\n"); |
| 214 | + |
| 215 | + return `<title>media-use eval report</title> |
| 216 | +<style> |
| 217 | +:root { --bg: #fafaf7; --text: #1b1b18; --muted: #7a756a; --accent: #0d7377; --good: #1a7a3a; --warn: #b45309; --fail: #dc2626; --border: #e8e5df; --surface: #fff; --mono: ui-monospace, 'SF Mono', Menlo, Consolas, monospace; --sans: system-ui, -apple-system, sans-serif; --serif: Georgia, serif } |
| 218 | +* { box-sizing: border-box; margin: 0 } body { background: var(--bg); color: var(--text); font-family: var(--serif); line-height: 1.6; font-size: 15px; padding: 40px 24px } |
| 219 | +.wrap { max-width: 1100px; margin: 0 auto } |
| 220 | +h1 { font-family: var(--sans); font-size: 28px; font-weight: 700; margin-bottom: 8px; letter-spacing: -.02em } |
| 221 | +h2 { font-family: var(--sans); font-size: 20px; font-weight: 650; margin: 32px 0 12px; letter-spacing: -.01em } |
| 222 | +h3 { font-family: var(--sans); font-size: 17px; font-weight: 650; margin: 24px 0 8px } |
| 223 | +h4 { font-family: var(--sans); font-size: 14px; font-weight: 600; margin: 16px 0 6px; color: var(--muted) } |
| 224 | +p { margin-bottom: 10px } |
| 225 | +.meta { font-family: var(--mono); font-size: 12px; color: var(--muted); margin-bottom: 24px } |
| 226 | +.summary { display: flex; gap: 16px; margin: 16px 0; flex-wrap: wrap } |
| 227 | +.stat { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 14px 18px; flex: 1; min-width: 140px } |
| 228 | +.stat .num { font-family: var(--sans); font-size: 28px; font-weight: 700; color: var(--accent) } |
| 229 | +.stat .label { font-family: var(--mono); font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: .1em } |
| 230 | +table { width: 100%; border-collapse: collapse; font-size: 13px; font-family: var(--sans); margin: 8px 0 } |
| 231 | +th { text-align: left; font-family: var(--mono); font-size: 10px; letter-spacing: .08em; text-transform: uppercase; color: var(--muted); border-bottom: 2px solid var(--border); padding: 6px 8px; font-weight: 700 } |
| 232 | +td { border-bottom: 1px solid var(--border); padding: 7px 8px; vertical-align: top } |
| 233 | +td.path { font-family: var(--mono); font-size: 12px; color: var(--muted); max-width: 300px; overflow: hidden; text-overflow: ellipsis } |
| 234 | +.pass { color: var(--good); font-weight: 600 } .warn { color: var(--warn); font-weight: 600 } .fail { color: var(--fail); font-weight: 600 } |
| 235 | +.comparison { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin: 12px 0 } |
| 236 | +@media(max-width:700px) { .comparison { grid-template-columns: 1fr } } |
| 237 | +.col { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 14px 16px } |
| 238 | +.col h4 { margin-top: 0 } |
| 239 | +pre { font-family: var(--mono); font-size: 12px; background: #1b1b18; color: #d4d0c8; border-radius: 6px; padding: 12px 14px; overflow-x: auto; margin: 6px 0; line-height: 1.5 } |
| 240 | +pre.file-list { background: var(--bg); color: var(--muted); border: 1px solid var(--border) } |
| 241 | +pre.index { white-space: pre; } |
| 242 | +.block-detail { border-top: 1px solid var(--border); padding-top: 20px; margin-top: 20px } |
| 243 | +.verdict { margin-top: 24px; padding: 16px 20px; border-radius: 8px; font-family: var(--sans); font-size: 15px } |
| 244 | +.verdict.ship { background: #edfbf0; border: 1px solid #1a7a3a; color: #1a7a3a } |
| 245 | +.verdict.wait { background: #fff3ec; border: 1px solid #d94f04; color: #d94f04 } |
| 246 | +</style> |
| 247 | +<div class="wrap"> |
| 248 | +<h1>media-use eval report</h1> |
| 249 | +<p class="meta">${new Date().toISOString().slice(0, 10)} · ${all.length} blocks evaluated · baseline vs. media-use --adopt</p> |
| 250 | +
|
| 251 | +<div class="summary"> |
| 252 | + <div class="stat"><div class="num">${all.length}</div><div class="label">blocks tested</div></div> |
| 253 | + <div class="stat"><div class="num">${passed.length}</div><div class="label">with assets</div></div> |
| 254 | + <div class="stat"><div class="num">${all.reduce((s, r) => s + r.adopted.count, 0)}</div><div class="label">assets adopted</div></div> |
| 255 | + <div class="stat"><div class="num">${all.filter((r) => r.manifest.some((m) => m.duration || m.width)).length}</div><div class="label">with ffprobe metadata</div></div> |
| 256 | + <div class="stat"><div class="num">${(() => { const refs = all.flatMap((r) => r.assetRefs); const covered = refs.filter((c) => c.covered).length; return refs.length > 0 ? Math.round((covered / refs.length) * 100) + "%" : "—"; })()}</div><div class="label">composition coverage</div></div> |
| 257 | +</div> |
| 258 | +
|
| 259 | +<h2>Results matrix</h2> |
| 260 | +<table> |
| 261 | + <thead><tr><th>Block</th><th>Baseline</th><th>Adopted</th><th>Metadata</th><th>Cache hit</th><th>Miss handling</th></tr></thead> |
| 262 | + <tbody>${rows}</tbody> |
| 263 | +</table> |
| 264 | +
|
| 265 | +<h2>Before / after comparisons</h2> |
| 266 | +${details} |
| 267 | +
|
| 268 | +<div class="verdict ${passed.length >= 3 ? "ship" : "wait"}"> |
| 269 | + ${passed.length >= 3 |
| 270 | + ? `<strong>Ship it.</strong> ${passed.length}/${all.length} blocks adopted successfully with metadata. Resolve cache hits work. Miss handling is clean.` |
| 271 | + : `<strong>Needs work.</strong> Only ${passed.length} blocks adopted. Check the failures above.`} |
| 272 | +</div> |
| 273 | +</div>`; |
| 274 | +} |
| 275 | + |
| 276 | +function escapeHtml(str) { |
| 277 | + return str.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">"); |
| 278 | +} |
| 279 | + |
| 280 | +console.log("media-use eval · running against registry blocks...\n"); |
| 281 | + |
| 282 | +const results = []; |
| 283 | +for (const block of TEST_BLOCKS) { |
| 284 | + const fullPath = join(REPO_ROOT, block); |
| 285 | + if (!existsSync(fullPath)) { |
| 286 | + console.log(` skip ${basename(block)} (not found)`); |
| 287 | + results.push(null); |
| 288 | + continue; |
| 289 | + } |
| 290 | + process.stdout.write(` ${basename(block)}...`); |
| 291 | + const result = evalBlock(block); |
| 292 | + if (result) { |
| 293 | + console.log(` ${result.adopted.count} adopted, ${result.manifest.filter((m) => m.duration || m.width).length} with metadata`); |
| 294 | + } else { |
| 295 | + console.log(" failed"); |
| 296 | + } |
| 297 | + results.push(result); |
| 298 | +} |
| 299 | + |
| 300 | +const report = generateReport(results); |
| 301 | +const outPath = join(SCRIPT_DIR, "..", "eval-report.html"); |
| 302 | +writeFileSync(outPath, report); |
| 303 | +console.log(`\nReport: ${outPath}`); |
0 commit comments