Skip to content

Commit 5e7a7a8

Browse files
committed
fix(capture): address review feedback on font extractor
Five fixes from Copilot's inline review + Miguel's note on PR heygen-com#987: 1. inferWeightFromSubfamily — only matched concatenated forms ("extralight", "semibold"). Spaced ("Extra Light") and hyphenated ("Extra-Light") variants fell through to the 400 default, misreporting 200-weight fonts as 400. Now normalizes `[\s-]+` out of the subfamily before matching. 2. meta.tool — was hardcoded to "fontkit@2.0.4" but `packages/cli/package.json` allows ^2.0.4, so the manifest string would drift on every dep bump. Now records just "fontkit"; the version moves with the dep and can be discovered from package.json at debug-time if needed. 3. FontFileMetadata.rawFamily — docstring said "nameID 16 preferred, then nameID 1" but the code also derives from PostScript via deriveFamilyFromPostscript when both name-table fields are missing. Doc now reflects the actual three-step precedence. 4. FontFileMetadata.weight — docstring said "100-900" but the code emits 0 (when identified: false) and 950 (when canonicalizeFamily picks ExtraBlack/UltraBlack). Doc now documents both edge values explicitly. 5. sharp ^0.34.5 — bumped from ^0.34.0 on this PR but font extraction doesn't use sharp; the bump is needed by the contact sheet code in PR heygen-com#988. Reverted on heygen-com#987; will re-bump on heygen-com#988 where it's actually consumed. Also adds vitest coverage: - 34 tests in fontMetadataExtractor.test.ts - Covers inferWeightFromSubfamily for concatenated, spaced, and hyphenated forms (including composite styles like "Bold Italic" and case-insensitivity) - Covers canonicalizeFamily for unchanged families, stripped weight tokens, preserved width modifiers, and the 950 emit - Integration tests for extractFontMetadata (non-existent dir, empty dir) verifying the meta.tool / generatedAt shape Exported `inferWeightFromSubfamily` and `canonicalizeFamily` for testing. Pure functions, internal helpers, but exporting is the clean way to pin their behavior against regressions.
1 parent db94b50 commit 5e7a7a8

3 files changed

Lines changed: 215 additions & 8 deletions

File tree

packages/cli/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"postcss": "^8.5.8",
3939
"prettier": "^3.8.1",
4040
"puppeteer-core": "^24.39.1",
41-
"sharp": "^0.34.5"
41+
"sharp": "^0.34.0"
4242
},
4343
"devDependencies": {
4444
"@clack/prompts": "^1.1.0",
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import { describe, expect, it } from "vitest";
2+
import { mkdtempSync, rmSync, existsSync, readFileSync } from "node:fs";
3+
import { tmpdir } from "node:os";
4+
import { join } from "node:path";
5+
import {
6+
canonicalizeFamily,
7+
extractFontMetadata,
8+
inferWeightFromSubfamily,
9+
} from "./fontMetadataExtractor.js";
10+
11+
describe("inferWeightFromSubfamily", () => {
12+
// The concatenated forms were always handled. The spaced and hyphenated
13+
// forms were the bug Copilot flagged on PR #987 — "Extra Light" used to
14+
// fall through to the 400 default before the whitespace-normalization fix.
15+
describe("concatenated forms (already handled)", () => {
16+
it.each([
17+
["Thin", 100],
18+
["ExtraLight", 200],
19+
["UltraLight", 200],
20+
["Light", 300],
21+
["Regular", 400],
22+
["Medium", 500],
23+
["SemiBold", 600],
24+
["DemiBold", 600],
25+
["Bold", 700],
26+
["ExtraBold", 800],
27+
["UltraBold", 800],
28+
["Black", 900],
29+
["Heavy", 900],
30+
])("%s → %d", (subfamily, expected) => {
31+
expect(inferWeightFromSubfamily(subfamily)).toBe(expected);
32+
});
33+
});
34+
35+
describe("spaced forms (the bug fix)", () => {
36+
it.each([
37+
["Extra Light", 200],
38+
["Ultra Light", 200],
39+
["Semi Bold", 600],
40+
["Demi Bold", 600],
41+
["Extra Bold", 800],
42+
["Ultra Bold", 800],
43+
])("%s → %d", (subfamily, expected) => {
44+
expect(inferWeightFromSubfamily(subfamily)).toBe(expected);
45+
});
46+
});
47+
48+
describe("hyphenated forms (the bug fix)", () => {
49+
it.each([
50+
["Extra-Light", 200],
51+
["Semi-Bold", 600],
52+
["Extra-Bold", 800],
53+
])("%s → %d", (subfamily, expected) => {
54+
expect(inferWeightFromSubfamily(subfamily)).toBe(expected);
55+
});
56+
});
57+
58+
describe("composite styles", () => {
59+
it("Bold Italic still detects Bold", () => {
60+
expect(inferWeightFromSubfamily("Bold Italic")).toBe(700);
61+
});
62+
it("Semi Bold Italic still detects SemiBold (priority over Bold)", () => {
63+
expect(inferWeightFromSubfamily("Semi Bold Italic")).toBe(600);
64+
});
65+
it("ExtraBold Italic still detects ExtraBold (priority over Bold)", () => {
66+
expect(inferWeightFromSubfamily("ExtraBold Italic")).toBe(800);
67+
});
68+
});
69+
70+
it("unknown subfamily falls back to 400 (Regular)", () => {
71+
expect(inferWeightFromSubfamily("Headline")).toBe(400);
72+
expect(inferWeightFromSubfamily("")).toBe(400);
73+
expect(inferWeightFromSubfamily("Some Random Style")).toBe(400);
74+
});
75+
76+
it("is case-insensitive", () => {
77+
expect(inferWeightFromSubfamily("EXTRA LIGHT")).toBe(200);
78+
expect(inferWeightFromSubfamily("extra light")).toBe(200);
79+
expect(inferWeightFromSubfamily("ExTrA LiGhT")).toBe(200);
80+
});
81+
});
82+
83+
describe("canonicalizeFamily", () => {
84+
it("returns family unchanged when no weight token is trailing", () => {
85+
expect(canonicalizeFamily("Inter")).toEqual({
86+
canonical: "Inter",
87+
inferredWeight: null,
88+
});
89+
expect(canonicalizeFamily("Tiempos Headline")).toEqual({
90+
canonical: "Tiempos Headline",
91+
inferredWeight: null,
92+
});
93+
expect(canonicalizeFamily("Söhne Breit")).toEqual({
94+
canonical: "Söhne Breit",
95+
inferredWeight: null,
96+
});
97+
});
98+
99+
it("strips trailing weight tokens and surfaces the implied weight", () => {
100+
expect(canonicalizeFamily("Inter Medium")).toEqual({
101+
canonical: "Inter",
102+
inferredWeight: 500,
103+
});
104+
expect(canonicalizeFamily("Inter Light")).toEqual({
105+
canonical: "Inter",
106+
inferredWeight: 300,
107+
});
108+
expect(canonicalizeFamily("Inter Bold")).toEqual({
109+
canonical: "Inter",
110+
inferredWeight: 700,
111+
});
112+
expect(canonicalizeFamily("Funnel Display Light")).toEqual({
113+
canonical: "Funnel Display",
114+
inferredWeight: 300,
115+
});
116+
});
117+
118+
it("preserves width modifiers before the weight token", () => {
119+
expect(canonicalizeFamily("Inter Tight Medium")).toEqual({
120+
canonical: "Inter Tight",
121+
inferredWeight: 500,
122+
});
123+
});
124+
125+
it("emits 950 for ExtraBlack / UltraBlack (mirrors foundry intent)", () => {
126+
expect(canonicalizeFamily("Inter ExtraBlack")).toEqual({
127+
canonical: "Inter",
128+
inferredWeight: 950,
129+
});
130+
});
131+
132+
it("returns empty input unchanged", () => {
133+
expect(canonicalizeFamily("")).toEqual({
134+
canonical: "",
135+
inferredWeight: null,
136+
});
137+
});
138+
});
139+
140+
describe("extractFontMetadata", () => {
141+
// Light integration tests against the public surface — uses a real
142+
// temp directory and verifies the manifest shape. Doesn't require
143+
// fixture font binaries; the non-existent and empty-directory cases
144+
// exercise the happy paths for the surrounding pipeline.
145+
146+
it("returns an empty manifest when the fonts directory doesn't exist", () => {
147+
const tmp = mkdtempSync(join(tmpdir(), "hf-font-test-"));
148+
try {
149+
const outputPath = join(tmp, "manifest.json");
150+
const manifest = extractFontMetadata(join(tmp, "does-not-exist"), outputPath);
151+
expect(manifest.files).toEqual([]);
152+
expect(manifest.families).toEqual([]);
153+
expect(manifest.unidentified).toEqual([]);
154+
expect(existsSync(outputPath)).toBe(true);
155+
const written = JSON.parse(readFileSync(outputPath, "utf-8")) as typeof manifest;
156+
expect(written.files).toEqual([]);
157+
expect(written.meta.tool).toBe("fontkit");
158+
expect(typeof written.meta.generatedAt).toBe("string");
159+
} finally {
160+
rmSync(tmp, { recursive: true, force: true });
161+
}
162+
});
163+
164+
it("writes a manifest with the documented meta shape", () => {
165+
const tmp = mkdtempSync(join(tmpdir(), "hf-font-test-"));
166+
try {
167+
const outputPath = join(tmp, "manifest.json");
168+
const manifest = extractFontMetadata(tmp, outputPath);
169+
expect(manifest.meta.tool).toBe("fontkit"); // no version hardcoded — moves with the dep
170+
// generatedAt is an ISO string
171+
expect(manifest.meta.generatedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
172+
} finally {
173+
rmSync(tmp, { recursive: true, force: true });
174+
}
175+
});
176+
});

packages/cli/src/capture/fontMetadataExtractor.ts

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,30 @@ export interface FontFileMetadata {
3636
* typographic family aggregate cleanly. See rawFamily for the unmodified value.
3737
*/
3838
family: string;
39-
/** Raw family name from the OpenType name table (nameID 16 preferred, then nameID 1). Empty if unidentifiable. */
39+
/**
40+
* Raw family name as extracted, before canonicalization. Source precedence:
41+
* 1. OpenType `name` table (nameID 16 if present, else nameID 1)
42+
* 2. Fallback: derived from the PostScript name (nameID 6) before the first
43+
* `-` (e.g. PostScript "Inter-Regular" → "Inter")
44+
* Empty string when both the name table and PostScript name are absent
45+
* (i.e. when `identified` is false).
46+
*/
4047
rawFamily: string;
4148
/** Subfamily / style name from nameID 17 or 2 (e.g. "Regular", "Bold Italic") */
4249
subfamily: string;
4350
/** PostScript name from nameID 6 (e.g. "Inter-Regular") */
4451
postscript: string;
45-
/** OS/2 usWeightClass (100–900). Approximate for variable fonts — see variationAxes. */
52+
/**
53+
* Weight value. Typically the OS/2 `usWeightClass` (100–900) when present.
54+
* Other values you may see:
55+
* - `0`: returned when the file is `identified: false` (no name-table data
56+
* to infer from); treat as unknown.
57+
* - `950`: emitted by the family-name canonicalization when a foundry
58+
* packaged "ExtraBlack" or "UltraBlack" as its own family. This is
59+
* outside the 100-900 standard range but mirrors the foundry intent.
60+
* For variable fonts, this is the file's default axis position — see
61+
* `variationAxes` for the available `wght` range.
62+
*/
4663
weight: number;
4764
/** "normal" or "italic" — derived from subfamily and OS/2 fsSelection */
4865
style: "normal" | "italic";
@@ -110,7 +127,9 @@ export function extractFontMetadata(fontsDir: string, outputPath: string): Fonts
110127
unidentified,
111128
meta: {
112129
generatedAt: new Date().toISOString(),
113-
tool: "fontkit@2.0.4",
130+
// Record just the tool name; the version moves with the dep and would
131+
// otherwise drift from a hardcoded string on every fontkit bump.
132+
tool: "fontkit",
114133
},
115134
};
116135

@@ -206,10 +225,21 @@ function deriveFamilyFromPostscript(postscript: string): string {
206225
return (dashIdx > 0 ? postscript.slice(0, dashIdx) : postscript).trim();
207226
}
208227

209-
/** Fallback when OS/2 table is missing — guess weight from "Bold", "Light", etc. */
228+
/**
229+
* Fallback when OS/2 table is missing — guess weight from "Bold", "Light", etc.
230+
*
231+
* Normalizes spaces and hyphens out of the subfamily before matching so that
232+
* fonts using spaced names ("Extra Light", "Semi Bold") or hyphenated names
233+
* ("Extra-Light", "Semi-Bold") resolve to the same weight as the concatenated
234+
* forms ("ExtraLight", "SemiBold"). Without this, a font subfamily of
235+
* "Extra Light" would fall through every concat check and end at the 400
236+
* default, misreporting a 200-weight font as 400.
237+
*
238+
* Exported for unit testing.
239+
*/
210240
// fallow-ignore-next-line complexity
211-
function inferWeightFromSubfamily(subfamily: string): number {
212-
const s = subfamily.toLowerCase();
241+
export function inferWeightFromSubfamily(subfamily: string): number {
242+
const s = subfamily.toLowerCase().replace(/[\s-]+/g, "");
213243
if (s.includes("thin")) return 100;
214244
if (s.includes("extralight") || s.includes("ultralight")) return 200;
215245
if (s.includes("light")) return 300;
@@ -273,8 +303,9 @@ const WEIGHT_TOKEN_RE = new RegExp(`\\s+(${Object.keys(WEIGHT_TOKEN_TO_VALUE).jo
273303
* italic flag is recovered separately from the OS/2 fsSelection bit, so no
274304
* information is lost.
275305
*/
306+
// Exported for unit testing.
276307
// fallow-ignore-next-line complexity
277-
function canonicalizeFamily(family: string): {
308+
export function canonicalizeFamily(family: string): {
278309
canonical: string;
279310
inferredWeight: number | null;
280311
} {

0 commit comments

Comments
 (0)