Skip to content

Commit b56b7c5

Browse files
committed
feat(cli): capture-video on-demand fetcher + capture pipeline robustness
For the hyperframes.dev website-to-video flow. Real-AI-test runs against heygen.com, huly.io, and heygen-showcase surfaced two gaps: (1) capture's logo / asset-captioning signals missed modern React/Tailwind builds; and (2) there was no CLI surface to pull the videos the manifest references. New command: • `hyperframes capture-video <project>` — on-demand downloader for entries in capture/extracted/video-manifest.json. Capture writes the manifest + preview PNGs but skips the mp4s; this pulls one entry by `--index N` (matched against the entry's `index` field, NOT array offset — gaps are possible when a preview screenshot fails). SSRF-safe via safeFetch, 250 MB cap, content-type whitelist, race-free exclusive-create write. Layout-aware (handles both standalone capture and W2H project layouts). Capture pipeline fixes: • Structural logo signals (assetCataloger + tokenExtractor): inBanner / inHomeLink / matchesTitleBrand. Class-substring alone caught 0/32 SVGs on heygen.com — modern builds don't put 'logo' / 'brand' in any className. • Content-hash SVG slugs (assetDownloader): `svg-<8char-sha1>.svg` — label-derived slugs mis-attributed partner-logo carousels (heygen-logo.svg actually contained Google, hubspot-logo.svg contained Trivago, etc.). Content-hash names are invariant by construction. • SVG → PNG rasterization before Gemini Vision (contentExtractor): the raw-SVG-as-text path was hallucinating wordmarks (VIVIENNE for HubSpot, 'wrestling' for Workday). Adds polarity detection so a white-glyph SVG flattened to a blank PNG gets inverted before captioning. LOGO tag in asset-descriptions.md when structural signals fire (independent of Gemini key presence). • Double-escape \/ inside the page.evaluate template literal in assetCataloger + tokenExtractor: the original `/^https?:\/\/.../` collapsed to `/` mid-template and threw `Unexpected token ^`. Capture was 100% blocked on this until the escape was fixed. • `asset-descriptions.md` header branches on Gemini-key presence with an explicit 'Vision OFF — catalog-derived descriptions' warning. New lint rule: • `lintMissingLocalAsset` (cli/utils/lintProject): scans <video> / <img> / <source> src for local files that don't exist in the project. Empirically the most common sub-agent mistake across multi-URL runs (~5+ per run). Uses `resolveExistingLocalAsset` so the existence check matches the bundler's notion of 'resolves'. Masks comment / style / script ranges before scanning so a literal `<img src=missing.png>` inside a tutorial comment isn't reported. Tests: 17 new for capture-video (safeFilename decoding/sanitization, VIDEO_CONTENT_TYPE_RE accept/reject, pickManifestEntry index-field lookup with gaps, URL-mismatch + bad-index rejection, --index over --url priority); 70 cases under lintProject.test.ts covering the new rule and existing rules. Sibling PRs in this stack: • #PR_A1 — fix(producer): __dirname ESM banner shim • #PR_A2 — fix(core/lint): findRootTag masks comment/style/script
1 parent 211e0ad commit b56b7c5

11 files changed

Lines changed: 787 additions & 29 deletions

File tree

packages/cli/src/capture/assetCataloger.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ export interface CatalogedAsset {
2525
sectionClasses?: string;
2626
/** Whether the image is above the fold (visible without scrolling) */
2727
aboveFold?: boolean;
28+
/** Element sits inside <header>, <nav>, or [role="banner"] — logo signal */
29+
inBanner?: boolean;
30+
/** Element sits inside <a> with site-root href ("/", "#", origin-only) — brand-home link */
31+
inHomeLink?: boolean;
32+
/** alt/aria-label/title contains the brand segment of document.title */
33+
matchesTitleBrand?: boolean;
2834
}
2935

3036
/**
@@ -62,6 +68,19 @@ export async function catalogAssets(page: Page): Promise<CatalogedAsset[]> {
6268
var rect = el.getBoundingClientRect();
6369
ctx.aboveFold = rect.top < window.innerHeight;
6470
} catch(e) {}
71+
// Structural logo-candidate signals: class-substring alone caught 0/32 SVGs on heygen.com.
72+
ctx.inBanner = el.closest('header, nav, [role="banner"]') !== null;
73+
var homeAnchor = el.closest('a[href]');
74+
if (homeAnchor) {
75+
var aHref = homeAnchor.getAttribute('href') || '';
76+
ctx.inHomeLink = aHref === '/' || aHref === '#' || aHref === './' ||
77+
/^https?:\\/\\/[^/]+\\/?$/.test(aHref);
78+
}
79+
var titleBrand = (document.title || '').split(/[-|—]/)[0].trim();
80+
if (desc && titleBrand.length > 1 && titleBrand.length < 30 &&
81+
desc.toLowerCase().indexOf(titleBrand.toLowerCase()) !== -1) {
82+
ctx.matchesTitleBrand = true;
83+
}
6584
return ctx;
6685
}
6786
@@ -92,12 +111,15 @@ export async function catalogAssets(page: Page): Promise<CatalogedAsset[]> {
92111
if (notes && !entry.notes) {
93112
entry.notes = notes;
94113
}
95-
// Merge rich context (first one wins)
114+
// Text fields: first-occurrence wins. Boolean signals: any positive sample wins.
96115
if (richCtx) {
97116
if (richCtx.description && !entry.description) entry.description = richCtx.description;
98117
if (richCtx.nearestHeading && !entry.nearestHeading) entry.nearestHeading = richCtx.nearestHeading;
99118
if (richCtx.sectionClasses && !entry.sectionClasses) entry.sectionClasses = richCtx.sectionClasses;
100119
if (richCtx.aboveFold !== undefined && entry.aboveFold === undefined) entry.aboveFold = richCtx.aboveFold;
120+
if (richCtx.inBanner) entry.inBanner = true;
121+
if (richCtx.inHomeLink) entry.inHomeLink = true;
122+
if (richCtx.matchesTitleBrand) entry.matchesTitleBrand = true;
101123
}
102124
}
103125
@@ -324,6 +346,9 @@ function deduplicateSrcsetVariants(assets: CatalogedAsset[]): CatalogedAsset[] {
324346
if (a.notes && !existing.notes) {
325347
existing.notes = a.notes;
326348
}
349+
if (a.inBanner) existing.inBanner = true;
350+
if (a.inHomeLink) existing.inHomeLink = true;
351+
if (a.matchesTitleBrand) existing.matchesTitleBrand = true;
327352
// Keep the URL with highest w= value (largest image)
328353
const existingW = getWidthParam(existing.url);
329354
const newW = getWidthParam(a.url);

packages/cli/src/capture/assetDownloader.ts

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,16 @@
77

88
import { writeFileSync, mkdirSync } from "node:fs";
99
import { join, extname } from "node:path";
10+
import { createHash } from "node:crypto";
1011
import type { DesignTokens, DownloadedAsset } from "./types.js";
1112
import type { CatalogedAsset } from "./assetCataloger.js";
1213

14+
// Hash of the bytes — filename cannot drift from content.
15+
function svgContentHashSlug(svgSource: string | Buffer, isLogo: boolean): string {
16+
const hash = createHash("sha1").update(svgSource).digest("hex").slice(0, 8);
17+
return isLogo ? `logo-${hash}` : `svg-${hash}`;
18+
}
19+
1320
export async function downloadAssets(
1421
tokens: DesignTokens,
1522
outputDir: string,
@@ -22,15 +29,13 @@ export async function downloadAssets(
2229
const assets: DownloadedAsset[] = [];
2330
const downloadedUrls = new Set<string>();
2431

25-
// 1. ALL inline SVGs — save as files (logos get priority naming)
32+
// Inline SVGs → svg-<hash>.svg / logo-<hash>.svg. Label-derived names mis-assigned brands.
2633
mkdirSync(join(outputDir, "assets", "svgs"), { recursive: true });
2734
const usedSvgNames = new Set<string>();
2835
for (let i = 0; i < tokens.svgs.length && i < 30; i++) {
2936
const svg = tokens.svgs[i]!;
3037
if (!svg.outerHTML || svg.outerHTML.length < 50) continue;
31-
const label = svg.label?.replace(/[^a-zA-Z0-9-_ ]/g, "").trim();
32-
let slug = label ? slugify(label) : svg.isLogo ? `logo-${i}` : `icon-${i}`;
33-
// Deduplicate — two SVGs with same aria-label get suffixed
38+
const slug = svgContentHashSlug(svg.outerHTML, !!svg.isLogo);
3439
let finalSlug = slug;
3540
let suffix = 2;
3641
while (usedSvgNames.has(finalSlug)) {
@@ -135,8 +140,24 @@ export async function downloadAssets(
135140
if (result.status !== "fulfilled" || !result.value) continue;
136141
const { url, isPoster, parsedUrl, ext, buffer, catalog } = result.value;
137142
try {
138-
// Generate human-readable name from catalog context
139-
const slug = deriveAssetName(parsedUrl, catalog, isPoster, imgIdx, usedNames);
143+
// SVGs → content-hash names (label-derived was mis-assigning brands). Rasters keep catalog slugs.
144+
let slug: string;
145+
if (ext === ".svg") {
146+
const c = catalog;
147+
const brandRe = /logo|brand|wordmark/i;
148+
const isLogo = !!(
149+
c?.inBanner ||
150+
c?.inHomeLink ||
151+
c?.matchesTitleBrand ||
152+
c?.contexts?.some((s) => brandRe.test(s)) ||
153+
(c?.description && brandRe.test(c.description)) ||
154+
(c?.nearestHeading && brandRe.test(c.nearestHeading)) ||
155+
(c?.sectionClasses && brandRe.test(c.sectionClasses))
156+
);
157+
slug = svgContentHashSlug(buffer, isLogo);
158+
} else {
159+
slug = deriveAssetName(parsedUrl, catalog, isPoster, imgIdx, usedNames);
160+
}
140161
const name = `${slug}${ext}`;
141162
usedNames.add(slug);
142163
const localPath = `assets/${name}`;

packages/cli/src/capture/contentExtractor.ts

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import type { Page } from "puppeteer-core";
1212
import { existsSync, readdirSync, statSync, readFileSync } from "node:fs";
1313
import { join } from "node:path";
14+
import sharp from "sharp";
1415
import type { CatalogedAsset } from "./assetCataloger.js";
1516
import type { DesignTokens } from "./types.js";
1617

@@ -232,7 +233,7 @@ export async function captionImagesWithGemini(
232233
}
233234
progress("design", `${Object.keys(geminiCaptions).length} images captioned with Gemini`);
234235

235-
// Caption SVGs by sending source code as text (vision API rejects image/svg+xml).
236+
// Rasterize SVGs to PNG before captioning — Vision hallucinates wordmarks when reading SVG path text.
236237
const svgFiles: Array<{ file: string; relPath: string }> = [];
237238
const assetsDir = join(outputDir, "assets");
238239
for (const f of readdirSync(assetsDir)) {
@@ -246,30 +247,56 @@ export async function captionImagesWithGemini(
246247
}
247248

248249
if (svgFiles.length > 0) {
249-
progress("design", `Captioning ${svgFiles.length} SVGs via code analysis...`);
250+
progress("design", `Rasterizing + captioning ${svgFiles.length} SVGs via vision API...`);
250251
const SVG_BATCH = 20;
251-
const MAX_SVG_CHARS = 10_000;
252+
const SVG_RENDER_SIZE = 256; // px — enough resolution for Gemini to read wordmarks, small enough to keep payload sub-MB
252253
for (let i = 0; i < svgFiles.length; i += SVG_BATCH) {
253254
const batch = svgFiles.slice(i, i + SVG_BATCH);
254255
const results = await Promise.allSettled(
255256
batch.map(async ({ relPath }) => {
256257
const filePath = join(assetsDir, relPath);
257-
let svgText = readFileSync(filePath, "utf-8");
258-
if (svgText.length > MAX_SVG_CHARS) {
259-
svgText = svgText.slice(0, MAX_SVG_CHARS) + "\n<!-- truncated -->";
258+
let pngBase64: string;
259+
try {
260+
// Flatten against a contrasting background — white-on-white SVGs render invisible to Vision.
261+
const svgSource = readFileSync(filePath, "utf-8");
262+
const lightFillHits = (
263+
svgSource.match(/fill\s*=\s*["'](#fff(fff)?|white|#f[ef][ef])["']/gi) || []
264+
).length;
265+
const darkFillHits = (
266+
svgSource.match(/fill\s*=\s*["'](#000(000)?|black|#[0-3]{6}|#[0-3]{3})["']/gi) || []
267+
).length;
268+
const bg =
269+
lightFillHits > darkFillHits
270+
? { r: 32, g: 32, b: 32 } // dark slate behind light glyphs
271+
: { r: 255, g: 255, b: 255 }; // white behind dark glyphs (default)
272+
const pngBuffer = await sharp(filePath)
273+
.resize({
274+
width: SVG_RENDER_SIZE,
275+
height: SVG_RENDER_SIZE,
276+
fit: "inside",
277+
withoutEnlargement: false,
278+
})
279+
.flatten({ background: bg })
280+
.png()
281+
.toBuffer();
282+
pngBase64 = pngBuffer.toString("base64");
283+
} catch {
284+
// exotic SVG features may break sharp; skip caption rather than block
285+
return { file: relPath, caption: "" };
260286
}
261287
const response = await ai.models.generateContent({
262288
model,
263289
contents: [
264290
{
265291
role: "user",
266292
parts: [
293+
{ inlineData: { mimeType: "image/png", data: pngBase64 } },
267294
{
268295
text:
269-
"This SVG code is from a website. Describe what it renders in ONE short sentence " +
270-
"for a video storyboard. Focus on: what shape/icon/illustration it is, its colors. " +
271-
"Be factual.\n\n" +
272-
svgText,
296+
"Describe this SVG asset rendered from a website in ONE short sentence for a video storyboard. " +
297+
"Focus on: what shape/icon/illustration/wordmark it is, its colors, any text it contains. " +
298+
"If you see a wordmark, READ THE LETTERS LITERALLY — do not guess a brand from context. " +
299+
"Be factual.",
273300
},
274301
],
275302
},
@@ -358,11 +385,6 @@ export function generateAssetDescriptions(
358385
const svgsPath = join(assetsPath, "svgs");
359386
for (const file of readdirSync(svgsPath)) {
360387
if (!file.endsWith(".svg")) continue;
361-
const geminiCaption = geminiCaptions[`svgs/${file}`];
362-
if (geminiCaption) {
363-
svgLines.push(`svgs/${file}${geminiCaption}`);
364-
continue;
365-
}
366388
const svgMatch = tokens.svgs.find(
367389
(s) =>
368390
s.label &&
@@ -373,9 +395,13 @@ export function generateAssetDescriptions(
373395
.slice(0, 15),
374396
),
375397
);
398+
const geminiCaption = geminiCaptions[`svgs/${file}`];
399+
if (geminiCaption) {
400+
svgLines.push(`svgs/${file}${geminiCaption}`);
401+
continue;
402+
}
376403
const label = svgMatch?.label || file.replace(".svg", "").replace(/-/g, " ");
377-
const isLogo = svgMatch?.isLogo || file.includes("logo");
378-
svgLines.push(`svgs/${file}${isLogo ? "logo: " : "icon: "}${label}`);
404+
svgLines.push(`svgs/${file}${label}`);
379405
}
380406
} catch {
381407
/* no svgs dir */

packages/cli/src/capture/index.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -579,14 +579,19 @@ export async function captureWebsite(
579579
const lines = generateAssetDescriptions(outputDir, tokens, catalogedAssets, geminiCaptions);
580580

581581
if (lines.length > 0) {
582+
const hasGeminiKey = !!(process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY);
583+
const header = hasGeminiKey
584+
? "# Asset Descriptions\n\nOne line per file. Read this instead of opening every image individually.\n\nTo find a specific brand or icon, **grep this file for the brand name in the description text** (e.g. `grep -i 'autodesk' asset-descriptions.md`). The Gemini Vision captions identify what's actually in each file — that's the agent's selector.\n\nThe `logo-<hash>.svg` filename prefix is a cheap structural hint (DOM said this SVG was inside a `<header>`, home-link `<a>`, or had an aria-label matching the page brand). It is NOT a content claim — many `logo-*` files are nav icons or decorative shapes. Trust the captions, not the filename prefix.\n\n"
585+
: "# Asset Descriptions\n\n⚠️ GEMINI_API_KEY not set — descriptions below are catalog-derived (alt text, headings, section context, filename) instead of Vision-generated. To get richer Vision descriptions on the next capture, set GEMINI_API_KEY (or GOOGLE_API_KEY) and re-run.\n\nThe `logo-<hash>.svg` filename prefix is a structural hint (DOM said this SVG was inside a `<header>`, home-link `<a>`, or had an aria-label matching the page brand). To pick the actual brand logo without Vision, open the `logo-*` candidates in a previewer or rasterize them with `sharp` before referencing — composing a fake logo ships off-brand in the final video.\n\n";
582586
writeFileSync(
583587
join(outputDir, "extracted", "asset-descriptions.md"),
584-
"# Asset Descriptions\n\nOne line per file. Read this instead of opening every image individually.\n\n" +
585-
lines.map((l) => "- " + l).join("\n") +
586-
"\n",
588+
header + lines.map((l) => "- " + l).join("\n") + "\n",
587589
"utf-8",
588590
);
589-
progress("design", `${lines.length} asset descriptions written`);
591+
progress(
592+
"design",
593+
`${lines.length} asset descriptions written${hasGeminiKey ? "" : " (no Gemini key — catalog-fallback mode)"}`,
594+
);
590595
}
591596
} catch {
592597
/* non-critical */

packages/cli/src/capture/tokenExtractor.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,14 +353,42 @@ const EXTRACT_SCRIPT = `(() => {
353353
// Keep SVGs that have a label OR are at least 16px wide OR are inside a logo/brand context
354354
var inLogoContext = svg.closest('[class*="logo"], [class*="brand"], [class*="partner"], [class*="customer"], [class*="marquee"]') !== null;
355355
if (!label && !inLogoContext && (!w || parseInt(w) < 16)) return null;
356+
// Broadened isLogo: class-substring alone caught 0/32 on heygen.com.
357+
var isLogo = (label && label.toLowerCase().indexOf("logo") !== -1) ||
358+
svg.closest('[class*="logo"], [class*="brand"], [class*="home"], [class*="marquee"], [class*="partner"], [class*="customer"]') !== null;
359+
if (!isLogo) {
360+
var bannerEl = svg.closest('header, nav, [role="banner"]');
361+
if (bannerEl) {
362+
var firstSvg = bannerEl.querySelector('svg');
363+
if (firstSvg === svg) isLogo = true;
364+
}
365+
}
366+
if (!isLogo) {
367+
var anchor = svg.closest('a[href]');
368+
if (anchor) {
369+
var href = anchor.getAttribute('href') || '';
370+
if (href === '/' || href === '#' || href === './' ||
371+
/^https?:\\/\\/[^/]+\\/?$/.test(href)) {
372+
isLogo = true;
373+
}
374+
}
375+
}
376+
if (!isLogo) {
377+
var ariaLabel = svg.getAttribute('aria-label') || svg.getAttribute('title') || '';
378+
var titleBrand = (document.title || '').split(/[-|—]/)[0].trim();
379+
if (titleBrand.length > 1 && titleBrand.length < 30 &&
380+
ariaLabel.toLowerCase().indexOf(titleBrand.toLowerCase()) !== -1) {
381+
isLogo = true;
382+
}
383+
}
356384
var rect = svg.getBoundingClientRect();
357385
return {
358386
label: label || undefined,
359387
viewBox: svg.getAttribute("viewBox") || undefined,
360388
width: Math.round(rect.width),
361389
height: Math.round(rect.height),
362390
outerHTML: svg.outerHTML.slice(0, 10000),
363-
isLogo: (label && label.toLowerCase().indexOf("logo") !== -1) || svg.closest('[class*="logo"], [class*="brand"], [class*="home"], [class*="marquee"], [class*="partner"], [class*="customer"]') !== null
391+
isLogo: isLogo
364392
};
365393
}).filter(Boolean).slice(0, 50);
366394

packages/cli/src/cli.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ const subCommands = {
135135
validate: () => import("./commands/validate.js").then((m) => m.default),
136136
snapshot: () => import("./commands/snapshot.js").then((m) => m.default),
137137
capture: () => import("./commands/capture.js").then((m) => m.default),
138+
"capture-video": () => import("./commands/capture-video.js").then((m) => m.default),
138139
lambda: () => import("./commands/lambda.js").then((m) => m.default),
139140
cloudrun: () => import("./commands/cloudrun.js").then((m) => m.default),
140141
cloud: () => import("./commands/cloud.js").then((m) => m.default),

0 commit comments

Comments
 (0)