From e78d3a6d5dcb828f079380877be046a585d2e931 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 27 Apr 2026 21:06:57 +0000
Subject: [PATCH 1/4] Fix OG image issues: CJK line breaking and generic title
 enrichment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue 1: CJK characters (Japanese, Chinese, Korean) are full-width (~1em)
but the width estimator treated them identically to Latin characters (~0.48em).
This caused titles like 'Langfuse Cloud 日本リージョンを開始しました' to be
crammed onto one line instead of wrapping properly.

- Add isCjkOrFullWidth() and effectiveCharCount() for accurate width estimation
- Add tokenize() to split CJK text at character boundaries (not just whitespace)
- Add joinTokens() to reconstruct display text with proper spacing
- Update all layout functions (splitTwoLinesByWidth, splitTwoLines,
  greedyWordsToTitleRows, splitTitleIntoBalancedLines) to use CJK-aware splitting
- Prevent CJK titles from being classified as 'short' (single-line)

Issue 2: Pages with generic frontmatter titles (Overview, Get Started, etc.)
now get enriched OG titles using parent folder context from the URL slug.

- 'Get Started' at /docs/prompt-management/get-started becomes
  'Get Started with Prompt Management'
- 'Overview' at /docs becomes 'Langfuse Overview'
- 'Overview' at /docs/metrics/overview becomes 'Metrics Overview'
- Pages with explicit seoTitle are unchanged

Co-authored-by: felixkrrr <felixkrrr@users.noreply.github.com>
---
 app/api/og/route.tsx | 142 +++++++++++++++++++++++++++++++++++++------
 lib/mdx-page.ts      |  52 +++++++++++++++-
 2 files changed, 175 insertions(+), 19 deletions(-)

diff --git a/app/api/og/route.tsx b/app/api/og/route.tsx
index 789c38c9e5..101daedbf6 100644
--- a/app/api/og/route.tsx
+++ b/app/api/og/route.tsx
@@ -21,6 +21,31 @@ function wrapWords(text: string, maxChars: number): string[] {
   return lines.length ? lines : [""];
 }
 
+/**
+ * Split a string into "tokens" that can be laid out independently.
+ * Latin/space-delimited text stays as whole words; CJK characters become
+ * individual tokens so they can wrap at any character boundary.
+ */
+function tokenize(text: string): string[] {
+  const tokens: string[] = [];
+  let buf = "";
+  for (const ch of text) {
+    const cp = ch.codePointAt(0)!;
+    if (/\s/.test(ch)) {
+      if (buf) { tokens.push(buf); buf = ""; }
+      continue;
+    }
+    if (isCjkOrFullWidth(cp)) {
+      if (buf) { tokens.push(buf); buf = ""; }
+      tokens.push(ch);
+    } else {
+      buf += ch;
+    }
+  }
+  if (buf) tokens.push(buf);
+  return tokens;
+}
+
 /** ~average char width for F37 (panel / two-line checks). */
 const ANALOG_CHAR_EM = 0.48;
 /**
@@ -50,8 +75,49 @@ const TITLE_LONG_TITLE_FONT_SIZES = [120, 112, ...TITLE_FONT_SIZES];
 const DESC_FONT_SIZES = [26, 24, 22, 20, 18, 16, 14, 13, 12];
 const TITLE_MAX_REFINE_FS = 120;
 
+/**
+ * CJK and other full-width characters render at roughly 1em while Latin
+ * letters average around the given `em` fraction.  Count effective character
+ * units so width estimation works for mixed-script titles (e.g. Japanese).
+ */
+function effectiveCharCount(line: string, em: number): number {
+  let units = 0;
+  for (const ch of line) {
+    const cp = ch.codePointAt(0)!;
+    if (isCjkOrFullWidth(cp)) {
+      units += 1.0 / em;
+    } else {
+      units += 1;
+    }
+  }
+  return units;
+}
+
+function isCjkOrFullWidth(cp: number): boolean {
+  return (
+    (cp >= 0x2e80 && cp <= 0x9fff) ||  // CJK radicals, kangxi, ideographs
+    (cp >= 0xf900 && cp <= 0xfaff) ||  // CJK compatibility ideographs
+    (cp >= 0xfe30 && cp <= 0xfe4f) ||  // CJK compatibility forms
+    (cp >= 0xff01 && cp <= 0xff60) ||  // fullwidth Latin + halfwidth forms start
+    (cp >= 0xffe0 && cp <= 0xffe6) ||  // fullwidth signs
+    (cp >= 0x20000 && cp <= 0x2fa1f) || // CJK unified ext B–F, compat supplement
+    (cp >= 0x3000 && cp <= 0x303f) ||  // CJK symbols and punctuation
+    (cp >= 0x3040 && cp <= 0x309f) ||  // Hiragana
+    (cp >= 0x30a0 && cp <= 0x30ff) ||  // Katakana
+    (cp >= 0x31f0 && cp <= 0x31ff) ||  // Katakana phonetic extensions
+    (cp >= 0xac00 && cp <= 0xd7af)     // Hangul syllables
+  );
+}
+
+function hasCjk(text: string): boolean {
+  for (const ch of text) {
+    if (isCjkOrFullWidth(ch.codePointAt(0)!)) return true;
+  }
+  return false;
+}
+
 function approxLineWidthPx(line: string, fontSize: number, em: number): number {
-  return line.length * fontSize * em;
+  return effectiveCharCount(line, em) * fontSize * em;
 }
 
 /** Two lines using an approximate pixel budget (never wider than the panel). */
@@ -61,18 +127,25 @@ function splitTwoLinesByWidth(
   innerW: number
 ): string[] | null {
   const budget = innerW;
-  const words = title.trim().split(/\s+/).filter(Boolean);
+  const cjk = hasCjk(title);
+  const words = cjk
+    ? tokenize(title)
+    : title.trim().split(/\s+/).filter(Boolean);
   if (words.length <= 1) return null;
+  const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
   let best: string[] | null = null;
   let bestImbalance = Infinity;
   for (let cut = 1; cut < words.length; cut++) {
-    const l1 = words.slice(0, cut).join(" ");
-    const l2 = words.slice(cut).join(" ");
+    const l1 = join(words.slice(0, cut));
+    const l2 = join(words.slice(cut));
     if (
       approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) <= budget &&
       approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM) <= budget
     ) {
-      const imbalance = Math.abs(l1.length - l2.length);
+      const imbalance = Math.abs(
+        approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) -
+        approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM)
+      );
       if (imbalance < bestImbalance) {
         bestImbalance = imbalance;
         best = [l1, l2];
@@ -83,13 +156,17 @@ function splitTwoLinesByWidth(
 }
 
 function splitTwoLines(title: string, maxCharsPerLine: number): string[] | null {
-  const words = title.trim().split(/\s+/).filter(Boolean);
+  const cjk = hasCjk(title);
+  const words = cjk
+    ? tokenize(title)
+    : title.trim().split(/\s+/).filter(Boolean);
   if (words.length <= 1) return null;
+  const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
   let best: string[] | null = null;
   let bestImbalance = Infinity;
   for (let cut = 1; cut < words.length; cut++) {
-    const l1 = words.slice(0, cut).join(" ");
-    const l2 = words.slice(cut).join(" ");
+    const l1 = join(words.slice(0, cut));
+    const l2 = join(words.slice(cut));
     if (l1.length <= maxCharsPerLine && l2.length <= maxCharsPerLine) {
       const imbalance = Math.abs(l1.length - l2.length);
       if (imbalance < bestImbalance) {
@@ -124,26 +201,49 @@ function titleTextBudgetWidthPx(innerW: number): number {
   return Math.max(40, innerW - TITLE_SPAN_H_PADDING_X) * TITLE_TEXT_LINE_FRAC;
 }
 
+/**
+ * Join tokens back into display text: CJK tokens are adjacent without spaces;
+ * Latin tokens are separated by spaces.
+ */
+function joinTokens(tokens: string[]): string {
+  if (tokens.length === 0) return "";
+  let result = tokens[0];
+  for (let i = 1; i < tokens.length; i++) {
+    const prevCjk = hasCjk(tokens[i - 1]);
+    const curCjk = hasCjk(tokens[i]);
+    if (prevCjk && curCjk) {
+      result += tokens[i];
+    } else {
+      result += " " + tokens[i];
+    }
+  }
+  return result;
+}
+
 /**
  * Pack words into rows: each row is the longest prefix that still fits the text budget.
  * This matches one yellow row = one visual line (no `wrapWords` char cap that then soft-wraps in Satori).
+ * Uses tokenize() for CJK-aware splitting so characters can wrap mid-"word".
  */
 function greedyWordsToTitleRows(
   title: string,
   fontSize: number,
   innerW: number
 ): string[] {
-  const words = title.trim().split(/\s+/).filter(Boolean);
-  if (words.length === 0) {
+  const tokens = hasCjk(title)
+    ? tokenize(title)
+    : title.trim().split(/\s+/).filter(Boolean);
+  if (tokens.length === 0) {
     return [""];
   }
+  const join = hasCjk(title) ? joinTokens : (t: string[]) => t.join(" ");
   const budget = titleTextBudgetWidthPx(innerW);
   const rows: string[] = [];
   let start = 0;
-  while (start < words.length) {
+  while (start < tokens.length) {
     let end = start;
-    for (let j = start + 1; j <= words.length; j++) {
-      const candidate = words.slice(start, j).join(" ");
+    for (let j = start + 1; j <= tokens.length; j++) {
+      const candidate = join(tokens.slice(start, j));
       if (
         approxLineWidthPx(candidate, fontSize, TITLE_LONG_LINE_EM) *
           TITLE_RENDER_SAFETY <=
@@ -155,10 +255,10 @@ function greedyWordsToTitleRows(
       }
     }
     if (end === start) {
-      rows.push(words[start]);
+      rows.push(tokens[start]);
       start += 1;
     } else {
-      rows.push(words.slice(start, end).join(" "));
+      rows.push(join(tokens.slice(start, end)));
       start = end;
     }
   }
@@ -237,7 +337,11 @@ function splitTitleIntoBalancedLines(
   innerW: number,
   targetLines: number
 ): string[] | null {
-  const words = title.trim().split(/\s+/).filter(Boolean);
+  const cjk = hasCjk(title);
+  const words = cjk
+    ? tokenize(title)
+    : title.trim().split(/\s+/).filter(Boolean);
+  const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
   const n = words.length;
   if (targetLines < 1 || targetLines > n) return null;
   const budget = titleTextBudgetWidthPx(innerW);
@@ -248,7 +352,7 @@ function splitTitleIntoBalancedLines(
   for (let i = 0; i < n; i++) {
     let line = "";
     for (let j = i; j < n; j++) {
-      line = line ? `${line} ${words[j]}` : words[j];
+      line = join(words.slice(i, j + 1));
       const w =
         approxLineWidthPx(line, fontSize, TITLE_LONG_LINE_EM) *
         TITLE_RENDER_SAFETY;
@@ -302,7 +406,7 @@ function splitTitleIntoBalancedLines(
   for (let k = targetLines; k >= 1; k--) {
     const start = prev[k][end];
     if (start < 0) return null;
-    out.push(words.slice(start, end).join(" "));
+    out.push(join(words.slice(start, end)));
     end = start;
   }
   out.reverse();
@@ -385,6 +489,7 @@ function fitTitleLayoutLongAtLineCount(
 function isLongTitle(title: string): boolean {
   const t = title.trim();
   if (t.length > 105) return true;
+  if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105 / ANALOG_CHAR_EM) return true;
   const words = t.split(/\s+/).filter(Boolean);
   return words.length > 14;
 }
@@ -393,6 +498,7 @@ function isLongTitle(title: string): boolean {
 function isShortTitle(title: string): boolean {
   const t = title.trim();
   if (!t) return false;
+  if (hasCjk(t)) return false;
   const words = t.split(/\s+/).filter(Boolean);
   return words.length <= 3 && t.length <= 36;
 }
diff --git a/lib/mdx-page.ts b/lib/mdx-page.ts
index d04bcd8f97..27248a3636 100644
--- a/lib/mdx-page.ts
+++ b/lib/mdx-page.ts
@@ -40,6 +40,55 @@ export async function loadPage(
 }
 /* eslint-enable @typescript-eslint/no-explicit-any */
 
+/**
+ * Titles that are too generic to stand alone in an OG image (no parent context).
+ * When one of these is the page title and there is a slug parent segment or
+ * section title available, we enrich it automatically for the OG card.
+ */
+const GENERIC_TITLES = new Set([
+  "overview",
+  "get started",
+  "concepts",
+  "core concepts",
+  "data model",
+  "troubleshooting and faq",
+  "troubleshooting & faq",
+  "mcp server",
+]);
+
+const SLUG_WORD_OVERRIDES: Record<string, string> = {
+  api: "API",
+  sdk: "SDK",
+  faq: "FAQ",
+  llm: "LLM",
+  mcp: "MCP",
+  ui: "UI",
+};
+
+function slugSegmentToTitle(segment: string): string {
+  return segment
+    .split("-")
+    .map((w) => SLUG_WORD_OVERRIDES[w.toLowerCase()] ?? w.charAt(0).toUpperCase() + w.slice(1))
+    .join(" ");
+}
+
+function enrichOgTitle(title: string, slug: string[], sectionTitle: string): string {
+  const lower = title.toLowerCase().trim();
+  if (!GENERIC_TITLES.has(lower)) return title;
+
+  let context: string;
+  if (slug.length >= 2) {
+    context = slugSegmentToTitle(slug[slug.length - 2]);
+  } else if (slug.length === 0) {
+    context = "Langfuse";
+  } else {
+    context = sectionTitle;
+  }
+
+  if (lower === "get started") return `Get Started with ${context}`;
+  return `${context} ${title}`;
+}
+
 /**
  * Builds Next.js Metadata for a section page.
  *
@@ -61,8 +110,9 @@ export function buildSectionMetadata(
   const canonicalUrl =
     pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath);
   const seoTitle = pageData.seoTitle || page.data.title;
+  const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle);
   const ogImage = buildOgImageUrl({
-    title: seoTitle,
+    title: ogTitle,
     description: page.data.description,
     section: sectionTitle,
     staticOgImage: pageData.ogImage,

From 3daab2afd750b16a10f1121c009c6ec3d442b04d Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 28 Apr 2026 08:27:46 +0000
Subject: [PATCH 2/4] Apply enriched titles to SEO <title> tag too, not just OG
 image
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generic frontmatter titles like 'Get Started' and 'Overview' now render
enriched in both the OG image AND the page <title> tag:
- 'Get Started - Langfuse' → 'Get Started with Prompt Management - Langfuse'
- 'Overview - Langfuse' → 'Langfuse Overview - Langfuse'

Pages with explicit seoTitle in frontmatter are unaffected.

Co-authored-by: felixkrrr <felixkrrr@users.noreply.github.com>
---
 lib/mdx-page.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/mdx-page.ts b/lib/mdx-page.ts
index 27248a3636..69187ff44b 100644
--- a/lib/mdx-page.ts
+++ b/lib/mdx-page.ts
@@ -109,10 +109,10 @@ export function buildSectionMetadata(
   const pagePath = `/${section}${slug.length > 0 ? `/${slug.join("/")}` : ""}`;
   const canonicalUrl =
     pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath);
-  const seoTitle = pageData.seoTitle || page.data.title;
-  const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle);
+  const rawTitle = pageData.seoTitle || page.data.title;
+  const seoTitle = pageData.seoTitle ? rawTitle : enrichOgTitle(rawTitle, slug, sectionTitle);
   const ogImage = buildOgImageUrl({
-    title: ogTitle,
+    title: seoTitle,
     description: page.data.description,
     section: sectionTitle,
     staticOgImage: pageData.ogImage,

From 4cea8b0c36e11a7303446ab13a0ce8af26502bfd Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 28 Apr 2026 08:32:21 +0000
Subject: [PATCH 3/4] Revert "Apply enriched titles to SEO <title> tag too, not
 just OG image"

This reverts commit 3daab2afd750b16a10f1121c009c6ec3d442b04d.
---
 lib/mdx-page.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/mdx-page.ts b/lib/mdx-page.ts
index 69187ff44b..27248a3636 100644
--- a/lib/mdx-page.ts
+++ b/lib/mdx-page.ts
@@ -109,10 +109,10 @@ export function buildSectionMetadata(
   const pagePath = `/${section}${slug.length > 0 ? `/${slug.join("/")}` : ""}`;
   const canonicalUrl =
     pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath);
-  const rawTitle = pageData.seoTitle || page.data.title;
-  const seoTitle = pageData.seoTitle ? rawTitle : enrichOgTitle(rawTitle, slug, sectionTitle);
+  const seoTitle = pageData.seoTitle || page.data.title;
+  const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle);
   const ogImage = buildOgImageUrl({
-    title: seoTitle,
+    title: ogTitle,
     description: page.data.description,
     section: sectionTitle,
     staticOgImage: pageData.ogImage,

From 93b33484cff138b2d4d175d5a4107e2c9157cc2b Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 28 Apr 2026 10:48:08 +0000
Subject: [PATCH 4/4] Fix isLongTitle CJK threshold: use 105 not
 105/ANALOG_CHAR_EM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The effective char count threshold was 105/0.48 ≈ 219, which could never
fire because the t.length > 105 guard above catches all strings that long.
A 40-char CJK title (visually as wide as ~83 Latin chars) was never routed
to fitTitleLayoutLong. Using 105 directly means 'this title has the visual
weight of a 105-char Latin string.'

Addresses PR review comment from greptile.

Co-authored-by: felixkrrr <felixkrrr@users.noreply.github.com>
---
 app/api/og/route.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/api/og/route.tsx b/app/api/og/route.tsx
index 101daedbf6..fe96ce1efb 100644
--- a/app/api/og/route.tsx
+++ b/app/api/og/route.tsx
@@ -489,7 +489,7 @@ function fitTitleLayoutLongAtLineCount(
 function isLongTitle(title: string): boolean {
   const t = title.trim();
   if (t.length > 105) return true;
-  if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105 / ANALOG_CHAR_EM) return true;
+  if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105) return true;
   const words = t.split(/\s+/).filter(Boolean);
   return words.length > 14;
 }