From 9d48e596ea5fdeff8daf63586cdbba47a4fd0df5 Mon Sep 17 00:00:00 2001 From: Charlie Luo Date: Mon, 1 Jun 2026 16:19:51 -0700 Subject: [PATCH] fix(md-exports): clean up artifacts in API docs markdown exports Two sources of noise leaked into the API .md exports, e.g. `* (string)*`: - React inserts empty `` separator comments between adjacent text nodes in server-rendered output (common on component-rendered pages like the API docs). Drop comment nodes in the HTML->markdown pipeline; comments dispatch by node type, so this lives in nodeHandlers, not handlers. - The parameter type was wrapped in an with a leading space inside it, which remark encoded as ` `. Move the space outside the so it serializes as a normal space. Also bump CACHE_VERSION, since cached entries are keyed on input HTML (not on this script) and would otherwise serve stale output after the logic change. Co-Authored-By: Claude --- scripts/generate-md-exports.mjs | 8 +++++++- scripts/generate-md-exports.test.mjs | 23 +++++++++++++++++++++++ src/components/apiPage/index.tsx | 3 +-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index e07e71fc4ef6d3..a1f505d6a7ca6e 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -32,7 +32,7 @@ import {rehypeExpandCodeTabs} from './rehype-expand-code-tabs.mjs'; const DOCS_ORIGIN = process.env.NEXT_PUBLIC_DEVELOPER_DOCS ? 'https://develop.sentry.dev' : 'https://docs.sentry.io'; -const CACHE_VERSION = 9; +const CACHE_VERSION = 10; const CACHE_COMPRESS_LEVEL = 4; const R2_BUCKET = process.env.NEXT_PUBLIC_DEVELOPER_DOCS ? 'sentry-develop-docs' @@ -1009,6 +1009,12 @@ async function genMDFromHTML(source, {cacheDir, noCache, usedCacheFiles}) { .use(rehypeExpandCodeTabs) .use(rehypeRemark, { document: false, + // Drop React's empty `` text-node separators, which otherwise leak into the + // markdown on component-rendered pages like the API docs. Comments dispatch by node + // type, so this must live in nodeHandlers rather than handlers. + nodeHandlers: { + comment() {}, + }, handlers: { // HACK: Extract the canonical URL during parsing link: (_state, node) => { diff --git a/scripts/generate-md-exports.test.mjs b/scripts/generate-md-exports.test.mjs index 65074eb9116e6e..30b09b776afcf4 100644 --- a/scripts/generate-md-exports.test.mjs +++ b/scripts/generate-md-exports.test.mjs @@ -15,6 +15,9 @@ function htmlToMarkdown(html) { .use(rehypeExpandCodeTabs) .use(rehypeRemark, { document: false, + nodeHandlers: { + comment() {}, + }, handlers: { button() {}, }, @@ -306,3 +309,23 @@ describe('rehypeExpandCodeTabs', () => { }); }); }); + +describe('comment stripping', () => { + it('drops React text-node separator comments emitted on component-rendered pages', () => { + const html = + '

organization_id_or_slug (string)

'; + + const md = htmlToMarkdown(html); + + expect(md).not.toContain('after

'); + + expect(md).not.toContain('