From e78d3a6d5dcb828f079380877be046a585d2e931 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 27 Apr 2026 21:06:57 +0000 Subject: [PATCH 1/4] Fix OG image issues: CJK line breaking and generic title enrichment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue 1: CJK characters (Japanese, Chinese, Korean) are full-width (~1em) but the width estimator treated them identically to Latin characters (~0.48em). This caused titles like 'Langfuse Cloud 日本リージョンを開始しました' to be crammed onto one line instead of wrapping properly. - Add isCjkOrFullWidth() and effectiveCharCount() for accurate width estimation - Add tokenize() to split CJK text at character boundaries (not just whitespace) - Add joinTokens() to reconstruct display text with proper spacing - Update all layout functions (splitTwoLinesByWidth, splitTwoLines, greedyWordsToTitleRows, splitTitleIntoBalancedLines) to use CJK-aware splitting - Prevent CJK titles from being classified as 'short' (single-line) Issue 2: Pages with generic frontmatter titles (Overview, Get Started, etc.) now get enriched OG titles using parent folder context from the URL slug. - 'Get Started' at /docs/prompt-management/get-started becomes 'Get Started with Prompt Management' - 'Overview' at /docs becomes 'Langfuse Overview' - 'Overview' at /docs/metrics/overview becomes 'Metrics Overview' - Pages with explicit seoTitle are unchanged Co-authored-by: felixkrrr --- app/api/og/route.tsx | 142 +++++++++++++++++++++++++++++++++++++------ lib/mdx-page.ts | 52 +++++++++++++++- 2 files changed, 175 insertions(+), 19 deletions(-) diff --git a/app/api/og/route.tsx b/app/api/og/route.tsx index 789c38c9e5..101daedbf6 100644 --- a/app/api/og/route.tsx +++ b/app/api/og/route.tsx @@ -21,6 +21,31 @@ function wrapWords(text: string, maxChars: number): string[] { return lines.length ? lines : [""]; } +/** + * Split a string into "tokens" that can be laid out independently. + * Latin/space-delimited text stays as whole words; CJK characters become + * individual tokens so they can wrap at any character boundary. + */ +function tokenize(text: string): string[] { + const tokens: string[] = []; + let buf = ""; + for (const ch of text) { + const cp = ch.codePointAt(0)!; + if (/\s/.test(ch)) { + if (buf) { tokens.push(buf); buf = ""; } + continue; + } + if (isCjkOrFullWidth(cp)) { + if (buf) { tokens.push(buf); buf = ""; } + tokens.push(ch); + } else { + buf += ch; + } + } + if (buf) tokens.push(buf); + return tokens; +} + /** ~average char width for F37 (panel / two-line checks). */ const ANALOG_CHAR_EM = 0.48; /** @@ -50,8 +75,49 @@ const TITLE_LONG_TITLE_FONT_SIZES = [120, 112, ...TITLE_FONT_SIZES]; const DESC_FONT_SIZES = [26, 24, 22, 20, 18, 16, 14, 13, 12]; const TITLE_MAX_REFINE_FS = 120; +/** + * CJK and other full-width characters render at roughly 1em while Latin + * letters average around the given `em` fraction. Count effective character + * units so width estimation works for mixed-script titles (e.g. Japanese). + */ +function effectiveCharCount(line: string, em: number): number { + let units = 0; + for (const ch of line) { + const cp = ch.codePointAt(0)!; + if (isCjkOrFullWidth(cp)) { + units += 1.0 / em; + } else { + units += 1; + } + } + return units; +} + +function isCjkOrFullWidth(cp: number): boolean { + return ( + (cp >= 0x2e80 && cp <= 0x9fff) || // CJK radicals, kangxi, ideographs + (cp >= 0xf900 && cp <= 0xfaff) || // CJK compatibility ideographs + (cp >= 0xfe30 && cp <= 0xfe4f) || // CJK compatibility forms + (cp >= 0xff01 && cp <= 0xff60) || // fullwidth Latin + halfwidth forms start + (cp >= 0xffe0 && cp <= 0xffe6) || // fullwidth signs + (cp >= 0x20000 && cp <= 0x2fa1f) || // CJK unified ext B–F, compat supplement + (cp >= 0x3000 && cp <= 0x303f) || // CJK symbols and punctuation + (cp >= 0x3040 && cp <= 0x309f) || // Hiragana + (cp >= 0x30a0 && cp <= 0x30ff) || // Katakana + (cp >= 0x31f0 && cp <= 0x31ff) || // Katakana phonetic extensions + (cp >= 0xac00 && cp <= 0xd7af) // Hangul syllables + ); +} + +function hasCjk(text: string): boolean { + for (const ch of text) { + if (isCjkOrFullWidth(ch.codePointAt(0)!)) return true; + } + return false; +} + function approxLineWidthPx(line: string, fontSize: number, em: number): number { - return line.length * fontSize * em; + return effectiveCharCount(line, em) * fontSize * em; } /** Two lines using an approximate pixel budget (never wider than the panel). */ @@ -61,18 +127,25 @@ function splitTwoLinesByWidth( innerW: number ): string[] | null { const budget = innerW; - const words = title.trim().split(/\s+/).filter(Boolean); + const cjk = hasCjk(title); + const words = cjk + ? tokenize(title) + : title.trim().split(/\s+/).filter(Boolean); if (words.length <= 1) return null; + const join = cjk ? joinTokens : (t: string[]) => t.join(" "); let best: string[] | null = null; let bestImbalance = Infinity; for (let cut = 1; cut < words.length; cut++) { - const l1 = words.slice(0, cut).join(" "); - const l2 = words.slice(cut).join(" "); + const l1 = join(words.slice(0, cut)); + const l2 = join(words.slice(cut)); if ( approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) <= budget && approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM) <= budget ) { - const imbalance = Math.abs(l1.length - l2.length); + const imbalance = Math.abs( + approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) - + approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM) + ); if (imbalance < bestImbalance) { bestImbalance = imbalance; best = [l1, l2]; @@ -83,13 +156,17 @@ function splitTwoLinesByWidth( } function splitTwoLines(title: string, maxCharsPerLine: number): string[] | null { - const words = title.trim().split(/\s+/).filter(Boolean); + const cjk = hasCjk(title); + const words = cjk + ? tokenize(title) + : title.trim().split(/\s+/).filter(Boolean); if (words.length <= 1) return null; + const join = cjk ? joinTokens : (t: string[]) => t.join(" "); let best: string[] | null = null; let bestImbalance = Infinity; for (let cut = 1; cut < words.length; cut++) { - const l1 = words.slice(0, cut).join(" "); - const l2 = words.slice(cut).join(" "); + const l1 = join(words.slice(0, cut)); + const l2 = join(words.slice(cut)); if (l1.length <= maxCharsPerLine && l2.length <= maxCharsPerLine) { const imbalance = Math.abs(l1.length - l2.length); if (imbalance < bestImbalance) { @@ -124,26 +201,49 @@ function titleTextBudgetWidthPx(innerW: number): number { return Math.max(40, innerW - TITLE_SPAN_H_PADDING_X) * TITLE_TEXT_LINE_FRAC; } +/** + * Join tokens back into display text: CJK tokens are adjacent without spaces; + * Latin tokens are separated by spaces. + */ +function joinTokens(tokens: string[]): string { + if (tokens.length === 0) return ""; + let result = tokens[0]; + for (let i = 1; i < tokens.length; i++) { + const prevCjk = hasCjk(tokens[i - 1]); + const curCjk = hasCjk(tokens[i]); + if (prevCjk && curCjk) { + result += tokens[i]; + } else { + result += " " + tokens[i]; + } + } + return result; +} + /** * Pack words into rows: each row is the longest prefix that still fits the text budget. * This matches one yellow row = one visual line (no `wrapWords` char cap that then soft-wraps in Satori). + * Uses tokenize() for CJK-aware splitting so characters can wrap mid-"word". */ function greedyWordsToTitleRows( title: string, fontSize: number, innerW: number ): string[] { - const words = title.trim().split(/\s+/).filter(Boolean); - if (words.length === 0) { + const tokens = hasCjk(title) + ? tokenize(title) + : title.trim().split(/\s+/).filter(Boolean); + if (tokens.length === 0) { return [""]; } + const join = hasCjk(title) ? joinTokens : (t: string[]) => t.join(" "); const budget = titleTextBudgetWidthPx(innerW); const rows: string[] = []; let start = 0; - while (start < words.length) { + while (start < tokens.length) { let end = start; - for (let j = start + 1; j <= words.length; j++) { - const candidate = words.slice(start, j).join(" "); + for (let j = start + 1; j <= tokens.length; j++) { + const candidate = join(tokens.slice(start, j)); if ( approxLineWidthPx(candidate, fontSize, TITLE_LONG_LINE_EM) * TITLE_RENDER_SAFETY <= @@ -155,10 +255,10 @@ function greedyWordsToTitleRows( } } if (end === start) { - rows.push(words[start]); + rows.push(tokens[start]); start += 1; } else { - rows.push(words.slice(start, end).join(" ")); + rows.push(join(tokens.slice(start, end))); start = end; } } @@ -237,7 +337,11 @@ function splitTitleIntoBalancedLines( innerW: number, targetLines: number ): string[] | null { - const words = title.trim().split(/\s+/).filter(Boolean); + const cjk = hasCjk(title); + const words = cjk + ? tokenize(title) + : title.trim().split(/\s+/).filter(Boolean); + const join = cjk ? joinTokens : (t: string[]) => t.join(" "); const n = words.length; if (targetLines < 1 || targetLines > n) return null; const budget = titleTextBudgetWidthPx(innerW); @@ -248,7 +352,7 @@ function splitTitleIntoBalancedLines( for (let i = 0; i < n; i++) { let line = ""; for (let j = i; j < n; j++) { - line = line ? `${line} ${words[j]}` : words[j]; + line = join(words.slice(i, j + 1)); const w = approxLineWidthPx(line, fontSize, TITLE_LONG_LINE_EM) * TITLE_RENDER_SAFETY; @@ -302,7 +406,7 @@ function splitTitleIntoBalancedLines( for (let k = targetLines; k >= 1; k--) { const start = prev[k][end]; if (start < 0) return null; - out.push(words.slice(start, end).join(" ")); + out.push(join(words.slice(start, end))); end = start; } out.reverse(); @@ -385,6 +489,7 @@ function fitTitleLayoutLongAtLineCount( function isLongTitle(title: string): boolean { const t = title.trim(); if (t.length > 105) return true; + if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105 / ANALOG_CHAR_EM) return true; const words = t.split(/\s+/).filter(Boolean); return words.length > 14; } @@ -393,6 +498,7 @@ function isLongTitle(title: string): boolean { function isShortTitle(title: string): boolean { const t = title.trim(); if (!t) return false; + if (hasCjk(t)) return false; const words = t.split(/\s+/).filter(Boolean); return words.length <= 3 && t.length <= 36; } diff --git a/lib/mdx-page.ts b/lib/mdx-page.ts index d04bcd8f97..27248a3636 100644 --- a/lib/mdx-page.ts +++ b/lib/mdx-page.ts @@ -40,6 +40,55 @@ export async function loadPage( } /* eslint-enable @typescript-eslint/no-explicit-any */ +/** + * Titles that are too generic to stand alone in an OG image (no parent context). + * When one of these is the page title and there is a slug parent segment or + * section title available, we enrich it automatically for the OG card. + */ +const GENERIC_TITLES = new Set([ + "overview", + "get started", + "concepts", + "core concepts", + "data model", + "troubleshooting and faq", + "troubleshooting & faq", + "mcp server", +]); + +const SLUG_WORD_OVERRIDES: Record = { + api: "API", + sdk: "SDK", + faq: "FAQ", + llm: "LLM", + mcp: "MCP", + ui: "UI", +}; + +function slugSegmentToTitle(segment: string): string { + return segment + .split("-") + .map((w) => SLUG_WORD_OVERRIDES[w.toLowerCase()] ?? w.charAt(0).toUpperCase() + w.slice(1)) + .join(" "); +} + +function enrichOgTitle(title: string, slug: string[], sectionTitle: string): string { + const lower = title.toLowerCase().trim(); + if (!GENERIC_TITLES.has(lower)) return title; + + let context: string; + if (slug.length >= 2) { + context = slugSegmentToTitle(slug[slug.length - 2]); + } else if (slug.length === 0) { + context = "Langfuse"; + } else { + context = sectionTitle; + } + + if (lower === "get started") return `Get Started with ${context}`; + return `${context} ${title}`; +} + /** * Builds Next.js Metadata for a section page. * @@ -61,8 +110,9 @@ export function buildSectionMetadata( const canonicalUrl = pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath); const seoTitle = pageData.seoTitle || page.data.title; + const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle); const ogImage = buildOgImageUrl({ - title: seoTitle, + title: ogTitle, description: page.data.description, section: sectionTitle, staticOgImage: pageData.ogImage, From 3daab2afd750b16a10f1121c009c6ec3d442b04d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 28 Apr 2026 08:27:46 +0000 Subject: [PATCH 2/4] Apply enriched titles to SEO tag too, not just OG image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generic frontmatter titles like 'Get Started' and 'Overview' now render enriched in both the OG image AND the page <title> tag: - 'Get Started - Langfuse' → 'Get Started with Prompt Management - Langfuse' - 'Overview - Langfuse' → 'Langfuse Overview - Langfuse' Pages with explicit seoTitle in frontmatter are unaffected. Co-authored-by: felixkrrr <felixkrrr@users.noreply.github.com> --- lib/mdx-page.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/mdx-page.ts b/lib/mdx-page.ts index 27248a3636..69187ff44b 100644 --- a/lib/mdx-page.ts +++ b/lib/mdx-page.ts @@ -109,10 +109,10 @@ export function buildSectionMetadata( const pagePath = `/${section}${slug.length > 0 ? `/${slug.join("/")}` : ""}`; const canonicalUrl = pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath); - const seoTitle = pageData.seoTitle || page.data.title; - const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle); + const rawTitle = pageData.seoTitle || page.data.title; + const seoTitle = pageData.seoTitle ? rawTitle : enrichOgTitle(rawTitle, slug, sectionTitle); const ogImage = buildOgImageUrl({ - title: ogTitle, + title: seoTitle, description: page.data.description, section: sectionTitle, staticOgImage: pageData.ogImage, From 4cea8b0c36e11a7303446ab13a0ce8af26502bfd Mon Sep 17 00:00:00 2001 From: Cursor Agent <cursoragent@cursor.com> Date: Tue, 28 Apr 2026 08:32:21 +0000 Subject: [PATCH 3/4] Revert "Apply enriched titles to SEO <title> tag too, not just OG image" This reverts commit 3daab2afd750b16a10f1121c009c6ec3d442b04d. --- lib/mdx-page.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/mdx-page.ts b/lib/mdx-page.ts index 69187ff44b..27248a3636 100644 --- a/lib/mdx-page.ts +++ b/lib/mdx-page.ts @@ -109,10 +109,10 @@ export function buildSectionMetadata( const pagePath = `/${section}${slug.length > 0 ? `/${slug.join("/")}` : ""}`; const canonicalUrl = pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath); - const rawTitle = pageData.seoTitle || page.data.title; - const seoTitle = pageData.seoTitle ? rawTitle : enrichOgTitle(rawTitle, slug, sectionTitle); + const seoTitle = pageData.seoTitle || page.data.title; + const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle); const ogImage = buildOgImageUrl({ - title: seoTitle, + title: ogTitle, description: page.data.description, section: sectionTitle, staticOgImage: pageData.ogImage, From 93b33484cff138b2d4d175d5a4107e2c9157cc2b Mon Sep 17 00:00:00 2001 From: Cursor Agent <cursoragent@cursor.com> Date: Tue, 28 Apr 2026 10:48:08 +0000 Subject: [PATCH 4/4] Fix isLongTitle CJK threshold: use 105 not 105/ANALOG_CHAR_EM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The effective char count threshold was 105/0.48 ≈ 219, which could never fire because the t.length > 105 guard above catches all strings that long. A 40-char CJK title (visually as wide as ~83 Latin chars) was never routed to fitTitleLayoutLong. Using 105 directly means 'this title has the visual weight of a 105-char Latin string.' Addresses PR review comment from greptile. Co-authored-by: felixkrrr <felixkrrr@users.noreply.github.com> --- app/api/og/route.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/og/route.tsx b/app/api/og/route.tsx index 101daedbf6..fe96ce1efb 100644 --- a/app/api/og/route.tsx +++ b/app/api/og/route.tsx @@ -489,7 +489,7 @@ function fitTitleLayoutLongAtLineCount( function isLongTitle(title: string): boolean { const t = title.trim(); if (t.length > 105) return true; - if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105 / ANALOG_CHAR_EM) return true; + if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105) return true; const words = t.split(/\s+/).filter(Boolean); return words.length > 14; }