Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 124 additions & 18 deletions app/api/og/route.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ function wrapWords(text: string, maxChars: number): string[] {
return lines.length ? lines : [""];
}

/**
* Split a string into "tokens" that can be laid out independently.
* Latin/space-delimited text stays as whole words; CJK characters become
* individual tokens so they can wrap at any character boundary.
*/
function tokenize(text: string): string[] {
const tokens: string[] = [];
let buf = "";
for (const ch of text) {
const cp = ch.codePointAt(0)!;
if (/\s/.test(ch)) {
if (buf) { tokens.push(buf); buf = ""; }
continue;
}
if (isCjkOrFullWidth(cp)) {
if (buf) { tokens.push(buf); buf = ""; }
tokens.push(ch);
} else {
buf += ch;
}
}
if (buf) tokens.push(buf);
return tokens;
}

/** ~average char width for F37 (panel / two-line checks). */
const ANALOG_CHAR_EM = 0.48;
/**
Expand Down Expand Up @@ -50,8 +75,49 @@ const TITLE_LONG_TITLE_FONT_SIZES = [120, 112, ...TITLE_FONT_SIZES];
const DESC_FONT_SIZES = [26, 24, 22, 20, 18, 16, 14, 13, 12];
const TITLE_MAX_REFINE_FS = 120;

/**
* CJK and other full-width characters render at roughly 1em while Latin
* letters average around the given `em` fraction. Count effective character
* units so width estimation works for mixed-script titles (e.g. Japanese).
*/
function effectiveCharCount(line: string, em: number): number {
let units = 0;
for (const ch of line) {
const cp = ch.codePointAt(0)!;
if (isCjkOrFullWidth(cp)) {
units += 1.0 / em;
} else {
units += 1;
}
}
return units;
}

function isCjkOrFullWidth(cp: number): boolean {
return (
(cp >= 0x2e80 && cp <= 0x9fff) || // CJK radicals, kangxi, ideographs
(cp >= 0xf900 && cp <= 0xfaff) || // CJK compatibility ideographs
(cp >= 0xfe30 && cp <= 0xfe4f) || // CJK compatibility forms
(cp >= 0xff01 && cp <= 0xff60) || // fullwidth Latin + halfwidth forms start
(cp >= 0xffe0 && cp <= 0xffe6) || // fullwidth signs
(cp >= 0x20000 && cp <= 0x2fa1f) || // CJK unified ext B–F, compat supplement
(cp >= 0x3000 && cp <= 0x303f) || // CJK symbols and punctuation
(cp >= 0x3040 && cp <= 0x309f) || // Hiragana
(cp >= 0x30a0 && cp <= 0x30ff) || // Katakana
(cp >= 0x31f0 && cp <= 0x31ff) || // Katakana phonetic extensions
(cp >= 0xac00 && cp <= 0xd7af) // Hangul syllables
);
}

function hasCjk(text: string): boolean {
for (const ch of text) {
if (isCjkOrFullWidth(ch.codePointAt(0)!)) return true;
}
return false;
}

function approxLineWidthPx(line: string, fontSize: number, em: number): number {
return line.length * fontSize * em;
return effectiveCharCount(line, em) * fontSize * em;
}

/** Two lines using an approximate pixel budget (never wider than the panel). */
Expand All @@ -61,18 +127,25 @@ function splitTwoLinesByWidth(
innerW: number
): string[] | null {
const budget = innerW;
const words = title.trim().split(/\s+/).filter(Boolean);
const cjk = hasCjk(title);
const words = cjk
? tokenize(title)
: title.trim().split(/\s+/).filter(Boolean);
if (words.length <= 1) return null;
const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
let best: string[] | null = null;
let bestImbalance = Infinity;
for (let cut = 1; cut < words.length; cut++) {
const l1 = words.slice(0, cut).join(" ");
const l2 = words.slice(cut).join(" ");
const l1 = join(words.slice(0, cut));
const l2 = join(words.slice(cut));
if (
approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) <= budget &&
approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM) <= budget
) {
const imbalance = Math.abs(l1.length - l2.length);
const imbalance = Math.abs(
approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) -
approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM)
);
if (imbalance < bestImbalance) {
bestImbalance = imbalance;
best = [l1, l2];
Expand All @@ -83,13 +156,17 @@ function splitTwoLinesByWidth(
}

function splitTwoLines(title: string, maxCharsPerLine: number): string[] | null {
const words = title.trim().split(/\s+/).filter(Boolean);
const cjk = hasCjk(title);
const words = cjk
? tokenize(title)
: title.trim().split(/\s+/).filter(Boolean);
if (words.length <= 1) return null;
const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
let best: string[] | null = null;
let bestImbalance = Infinity;
for (let cut = 1; cut < words.length; cut++) {
const l1 = words.slice(0, cut).join(" ");
const l2 = words.slice(cut).join(" ");
const l1 = join(words.slice(0, cut));
const l2 = join(words.slice(cut));
if (l1.length <= maxCharsPerLine && l2.length <= maxCharsPerLine) {
const imbalance = Math.abs(l1.length - l2.length);
if (imbalance < bestImbalance) {
Expand Down Expand Up @@ -124,26 +201,49 @@ function titleTextBudgetWidthPx(innerW: number): number {
return Math.max(40, innerW - TITLE_SPAN_H_PADDING_X) * TITLE_TEXT_LINE_FRAC;
}

/**
* Join tokens back into display text: CJK tokens are adjacent without spaces;
* Latin tokens are separated by spaces.
*/
function joinTokens(tokens: string[]): string {
if (tokens.length === 0) return "";
let result = tokens[0];
for (let i = 1; i < tokens.length; i++) {
const prevCjk = hasCjk(tokens[i - 1]);
const curCjk = hasCjk(tokens[i]);
if (prevCjk && curCjk) {
result += tokens[i];
} else {
result += " " + tokens[i];
}
}
return result;
}

/**
* Pack words into rows: each row is the longest prefix that still fits the text budget.
* This matches one yellow row = one visual line (no `wrapWords` char cap that then soft-wraps in Satori).
* Uses tokenize() for CJK-aware splitting so characters can wrap mid-"word".
*/
function greedyWordsToTitleRows(
title: string,
fontSize: number,
innerW: number
): string[] {
const words = title.trim().split(/\s+/).filter(Boolean);
if (words.length === 0) {
const tokens = hasCjk(title)
? tokenize(title)
: title.trim().split(/\s+/).filter(Boolean);
if (tokens.length === 0) {
return [""];
}
const join = hasCjk(title) ? joinTokens : (t: string[]) => t.join(" ");
const budget = titleTextBudgetWidthPx(innerW);
const rows: string[] = [];
let start = 0;
while (start < words.length) {
while (start < tokens.length) {
let end = start;
for (let j = start + 1; j <= words.length; j++) {
const candidate = words.slice(start, j).join(" ");
for (let j = start + 1; j <= tokens.length; j++) {
const candidate = join(tokens.slice(start, j));
if (
approxLineWidthPx(candidate, fontSize, TITLE_LONG_LINE_EM) *
TITLE_RENDER_SAFETY <=
Expand All @@ -155,10 +255,10 @@ function greedyWordsToTitleRows(
}
}
if (end === start) {
rows.push(words[start]);
rows.push(tokens[start]);
start += 1;
} else {
rows.push(words.slice(start, end).join(" "));
rows.push(join(tokens.slice(start, end)));
start = end;
}
}
Expand Down Expand Up @@ -237,7 +337,11 @@ function splitTitleIntoBalancedLines(
innerW: number,
targetLines: number
): string[] | null {
const words = title.trim().split(/\s+/).filter(Boolean);
const cjk = hasCjk(title);
const words = cjk
? tokenize(title)
: title.trim().split(/\s+/).filter(Boolean);
const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
const n = words.length;
if (targetLines < 1 || targetLines > n) return null;
const budget = titleTextBudgetWidthPx(innerW);
Expand All @@ -248,7 +352,7 @@ function splitTitleIntoBalancedLines(
for (let i = 0; i < n; i++) {
let line = "";
for (let j = i; j < n; j++) {
line = line ? `${line} ${words[j]}` : words[j];
line = join(words.slice(i, j + 1));
const w =
approxLineWidthPx(line, fontSize, TITLE_LONG_LINE_EM) *
TITLE_RENDER_SAFETY;
Expand Down Expand Up @@ -302,7 +406,7 @@ function splitTitleIntoBalancedLines(
for (let k = targetLines; k >= 1; k--) {
const start = prev[k][end];
if (start < 0) return null;
out.push(words.slice(start, end).join(" "));
out.push(join(words.slice(start, end)));
end = start;
}
out.reverse();
Expand Down Expand Up @@ -385,6 +489,7 @@ function fitTitleLayoutLongAtLineCount(
function isLongTitle(title: string): boolean {
const t = title.trim();
if (t.length > 105) return true;
if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105) return true;
const words = t.split(/\s+/).filter(Boolean);
return words.length > 14;
}
Expand All @@ -393,6 +498,7 @@ function isLongTitle(title: string): boolean {
function isShortTitle(title: string): boolean {
const t = title.trim();
if (!t) return false;
if (hasCjk(t)) return false;
const words = t.split(/\s+/).filter(Boolean);
return words.length <= 3 && t.length <= 36;
}
Expand Down
52 changes: 51 additions & 1 deletion lib/mdx-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,55 @@ export async function loadPage(
}
/* eslint-enable @typescript-eslint/no-explicit-any */

/**
* Titles that are too generic to stand alone in an OG image (no parent context).
* When one of these is the page title and there is a slug parent segment or
* section title available, we enrich it automatically for the OG card.
*/
const GENERIC_TITLES = new Set([
"overview",
"get started",
"concepts",
"core concepts",
"data model",
"troubleshooting and faq",
"troubleshooting & faq",
"mcp server",
]);

const SLUG_WORD_OVERRIDES: Record<string, string> = {
api: "API",
sdk: "SDK",
faq: "FAQ",
llm: "LLM",
mcp: "MCP",
ui: "UI",
};

function slugSegmentToTitle(segment: string): string {
return segment
.split("-")
.map((w) => SLUG_WORD_OVERRIDES[w.toLowerCase()] ?? w.charAt(0).toUpperCase() + w.slice(1))
.join(" ");
}

function enrichOgTitle(title: string, slug: string[], sectionTitle: string): string {
const lower = title.toLowerCase().trim();
if (!GENERIC_TITLES.has(lower)) return title;

let context: string;
if (slug.length >= 2) {
context = slugSegmentToTitle(slug[slug.length - 2]);
} else if (slug.length === 0) {
context = "Langfuse";
} else {
context = sectionTitle;
}

if (lower === "get started") return `Get Started with ${context}`;
return `${context} ${title}`;
}

/**
* Builds Next.js Metadata for a section page.
*
Expand All @@ -61,8 +110,9 @@ export function buildSectionMetadata(
const canonicalUrl =
pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath);
const seoTitle = pageData.seoTitle || page.data.title;
const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle);
const ogImage = buildOgImageUrl({
title: seoTitle,
title: ogTitle,
description: page.data.description,
section: sectionTitle,
staticOgImage: pageData.ogImage,
Expand Down
Loading