diff --git a/scripts/__tests__/frontmatter-check.test.mjs b/scripts/__tests__/frontmatter-check.test.mjs new file mode 100644 index 00000000..b8546365 --- /dev/null +++ b/scripts/__tests__/frontmatter-check.test.mjs @@ -0,0 +1,19 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { frontmatterError } from '../check-mdx-parse.mjs'; + +test('frontmatterError flags nested-quote frontmatter', () => { + const content = '---\ntitle: "ok"\ndescription: "带有、"显示所有方案"链接。"\n---\n\nBody\n'; + const err = frontmatterError(content); + assert.ok(err, 'expected an error object'); + assert.equal(typeof err.message, 'string'); +}); + +test('frontmatterError returns null for valid frontmatter', () => { + const content = '---\ntitle: "ok"\ndescription: "a clean one"\n---\n\nBody\n'; + assert.equal(frontmatterError(content), null); +}); + +test('frontmatterError returns null when there is no frontmatter', () => { + assert.equal(frontmatterError('Just body text, no frontmatter.\n'), null); +}); diff --git a/scripts/__tests__/nested-quote.test.mjs b/scripts/__tests__/nested-quote.test.mjs new file mode 100644 index 00000000..95bb1dcc --- /dev/null +++ b/scripts/__tests__/nested-quote.test.mjs @@ -0,0 +1,114 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + fixFrontmatterQuotes, + fixTagAttrQuotes, + fixNestedQuotesInBody, + fixNestedQuotes, + parsesClean, +} from '../lint-mdx.mjs'; + +test('fixFrontmatterQuotes converts inner double-quotes to single quotes', () => { + const fm = 'title: "ok"\ndescription: "构建带有、"显示所有方案"链接的付费墙。"'; + const { result, changed } = fixFrontmatterQuotes(fm); + assert.equal(changed, true); + assert.equal( + result, + 'title: "ok"\ndescription: "构建带有、\'显示所有方案\'链接的付费墙。"', + ); +}); + +test('fixFrontmatterQuotes leaves clean frontmatter untouched', () => { + const fm = 'title: "ok"\ndescription: "a clean description"'; + const { result, changed } = fixFrontmatterQuotes(fm); + assert.equal(changed, false); + assert.equal(result, fm); +}); + +test('fixFrontmatterQuotes preserves escaped quotes', () => { + const fm = 'description: "she said \\"hi\\""'; + const { result, changed } = fixFrontmatterQuotes(fm); + assert.equal(changed, false); + assert.equal(result, fm); +}); + +test('fixTagAttrQuotes fixes inner quotes in the last attribute', () => { + const tag = ''; + const { result, changed } = fixTagAttrQuotes(tag); + assert.equal(changed, true); + assert.equal( + result, + '', + ); +}); + +test('fixTagAttrQuotes leaves a clean tag untouched', () => { + const tag = ''; + const { result, changed } = fixTagAttrQuotes(tag); + assert.equal(changed, false); + assert.equal(result, tag); +}); + +test('fixTagAttrQuotes does not flag a valueless/boolean attribute', () => { + // `default` is a boolean attribute — the closing quote of `label` is + // followed by ` default>`, which must be recognized as a boundary. + const tag = ''; + const { result, changed } = fixTagAttrQuotes(tag); + assert.equal(changed, false); + assert.equal(result, tag); +}); + +test('fixTagAttrQuotes leaves a clean tag with trailing boolean attr + close', () => { + const tag = ''; + const { result, changed } = fixTagAttrQuotes(tag); + assert.equal(changed, false); + assert.equal(result, tag); +}); + +test('fixNestedQuotesInBody skips fenced code blocks', () => { + // Use a realistic CJK artifact: a stray inner quote followed by non-ASCII + // text (what the translator actually produces). ASCII continuations are + // intentionally read as adjacent attributes, not inner quotes. + const body = [ + '```jsx', + '', + '```', + '', + ].join('\n'); + const { result, changed } = fixNestedQuotesInBody(body); + assert.equal(changed, true); + const out = result.split('\n'); + // Code-fence line (index 1) preserved verbatim; only the real tag (index 3) fixed. + assert.equal(out[1], ''); + assert.equal(out[3], ''); +}); + +const BROKEN_FILE = [ + '---', + 'title: "ok"', + 'description: "带有、"显示所有方案"链接。"', + '---', + '', + 'Intro.', + '', +].join('\n'); + +test('fixNestedQuotes repairs frontmatter and body together', () => { + const { result, changed } = fixNestedQuotes(BROKEN_FILE); + assert.equal(changed, true); + assert.ok(result.includes('description: "带有、\'显示所有方案\'链接。"')); + assert.ok(result.includes('alt="配置了\'打开 URL\'操作"')); +}); + +test('parsesClean: false for broken file, true after fix', async () => { + assert.equal(await parsesClean(BROKEN_FILE), false); + const { result } = fixNestedQuotes(BROKEN_FILE); + assert.equal(await parsesClean(result), true); +}); + +test('fixNestedQuotes leaves a fully valid file unchanged', () => { + const ok = '---\ntitle: "ok"\n---\n\n\n'; + const { result, changed } = fixNestedQuotes(ok); + assert.equal(changed, false); + assert.equal(result, ok); +}); diff --git a/scripts/check-mdx-parse.mjs b/scripts/check-mdx-parse.mjs index 1a48824d..bc79f9e5 100644 --- a/scripts/check-mdx-parse.mjs +++ b/scripts/check-mdx-parse.mjs @@ -16,9 +16,11 @@ import fs from 'node:fs/promises'; import path from 'node:path'; +import { fileURLToPath } from 'node:url'; import { compile } from '@mdx-js/mdx'; import remarkDirective from 'remark-directive'; import { remarkAside } from '../src/plugins/remark-aside.mjs'; +import yaml from 'js-yaml'; const ROOT = process.cwd(); const SCAN_DIRS = ['src/content/docs', 'src/locales', 'src/components/reusable']; @@ -42,9 +44,39 @@ async function* walk(dir) { } } +// Returns a parse-error descriptor for the file's YAML frontmatter, or null if +// the frontmatter is valid / absent. @mdx-js/mdx does not validate frontmatter, +// so this closes the gap that let bad-YAML translations reach the slow build. +export function frontmatterError(content) { + const m = content.match(/^?---\r?\n([\s\S]*?)\r?\n---/); + if (!m) return null; + try { + yaml.load(m[1]); + return null; + } catch (err) { + return { + message: err.message.split('\n')[0], + // js-yaml mark line is 0-based and relative to the frontmatter body; + // +2 maps it to the file (1 for the opening `---`, 1 for 1-based lines). + line: err.mark ? err.mark.line + 2 : null, + column: err.mark ? err.mark.column + 1 : null, + }; + } +} + async function checkFile(file) { + const content = await fs.readFile(file, 'utf-8'); + const fmErr = frontmatterError(content); + if (fmErr) { + return { + file: path.relative(ROOT, file), + message: `frontmatter: ${fmErr.message}`, + line: fmErr.line, + column: fmErr.column, + }; + } try { - await compile(await fs.readFile(file, 'utf-8'), { + await compile(content, { jsx: true, remarkPlugins: [remarkDirective, remarkAside], }); @@ -59,29 +91,37 @@ async function checkFile(file) { } } -const files = []; -for (const dir of SCAN_DIRS) { - for await (const f of walk(path.join(ROOT, dir))) files.push(f); -} +async function main() { + const files = []; + for (const dir of SCAN_DIRS) { + for await (const f of walk(path.join(ROOT, dir))) files.push(f); + } -const issues = []; -for (let i = 0; i < files.length; i += CONCURRENCY) { - const chunk = files.slice(i, i + CONCURRENCY); - const results = await Promise.all(chunk.map(checkFile)); - for (const r of results) if (r) issues.push(r); -} + const issues = []; + for (let i = 0; i < files.length; i += CONCURRENCY) { + const chunk = files.slice(i, i + CONCURRENCY); + const results = await Promise.all(chunk.map(checkFile)); + for (const r of results) if (r) issues.push(r); + } -issues.sort((a, b) => a.file.localeCompare(b.file)); + issues.sort((a, b) => a.file.localeCompare(b.file)); -if (issues.length === 0) { - console.log(`check-mdx-parse: ${files.length} file(s) parsed cleanly`); - process.exit(0); + if (issues.length === 0) { + console.log(`check-mdx-parse: ${files.length} file(s) parsed cleanly`); + process.exit(0); + } + + console.error(`check-mdx-parse: ${files.length} scanned, ${issues.length} parse error(s):\n`); + for (const i of issues) { + const loc = i.line ? `${i.file}:${i.line}${i.column ? ':' + i.column : ''}` : i.file; + console.error(` ${loc}`); + console.error(` ${i.message}\n`); + } + process.exit(1); } -console.error(`check-mdx-parse: ${files.length} scanned, ${issues.length} parse error(s):\n`); -for (const i of issues) { - const loc = i.line ? `${i.file}:${i.line}${i.column ? ':' + i.column : ''}` : i.file; - console.error(` ${loc}`); - console.error(` ${i.message}\n`); +const isMain = process.argv[1] + && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url); +if (isMain) { + main(); } -process.exit(1); diff --git a/scripts/lint-mdx.mjs b/scripts/lint-mdx.mjs index 08080138..28bdfe36 100644 --- a/scripts/lint-mdx.mjs +++ b/scripts/lint-mdx.mjs @@ -41,6 +41,11 @@ import fs from 'node:fs/promises'; import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { compile } from '@mdx-js/mdx'; +import remarkDirective from 'remark-directive'; +import { remarkAside } from '../src/plugins/remark-aside.mjs'; +import yaml from 'js-yaml'; const ROOT = process.cwd(); @@ -63,6 +68,7 @@ const FIXABLE_RULES = new Set([ 'locale-import-drift', 'missing-import', 'reusable-missing-import', + 'nested-quote', ]); async function* walkMdx(dir) { @@ -266,6 +272,165 @@ function checkLocaleReusableImportDrift(localeReusables) { return issues; } +// Repair a double-quoted YAML frontmatter scalar whose value contains raw +// (unescaped) inner double-quotes — the translator wraps UI terms in `"` even +// when the English source uses single quotes. Converts each raw inner `"` to +// `'` (matching the English convention) and leaves the delimiters intact. +// Single-line scalars only (title/description/metadataTitle are always one line). +export function fixFrontmatterQuotes(fmText) { + let changed = false; + const lines = fmText.split('\n').map((line) => { + const m = line.match(/^(\s*[\w-]+:\s*)"(.*)"(\s*)$/); + if (!m) return line; + const [, pre, val, post] = m; + // Only act when a raw (non-escaped) inner quote is present. + if (!val.replace(/\\"/g, '').includes('"')) return line; + const fixedVal = val.replace(/\\?"/g, (q) => (q === '\\"' ? q : "'")); + changed = true; + return `${pre}"${fixedVal}"${post}`; + }); + return { result: lines.join('\n'), changed }; +} + +// Within a single JSX tag string, convert raw inner double-quotes inside a +// double-quoted attribute value to single quotes. The real closing quote is the +// one followed (after optional whitespace) by another `name=`, by `>`/`/>`, or +// by end-of-tag; any earlier `"` is an inner quote and gets converted. Escaped +// `\"` is preserved. Returns { result, changed }. +export function fixTagAttrQuotes(tag) { + let out = ''; + let changed = false; + let i = 0; + const n = tag.length; + const prevNonSpaceIsEq = () => { + for (let k = out.length - 1; k >= 0; k--) { + if (/\s/.test(out[k])) continue; + return out[k] === '='; + } + return false; + }; + while (i < n) { + if (tag[i] === '"' && prevNonSpaceIsEq()) { + out += '"'; // opening delimiter + i++; + let value = ''; + while (i < n) { + if (tag[i] === '\\' && tag[i + 1] === '"') { value += '\\"'; i += 2; continue; } + if (tag[i] === '"') { + // Decide whether this `"` is the real closing delimiter or a stray + // inner quote. In valid JSX, a closed attribute value is followed + // (after optional whitespace) by the tag close (`>` / `/>`), another + // attribute name (`[A-Za-z_]…`, valued or boolean), or a `{…}` + // expression/spread. Anything else (CJK, digits, punctuation — i.e. + // continuation of the value text) means this `"` was an inner quote. + let j = i + 1; + while (j < n && /\s/.test(tag[j])) j++; + const next = j < n ? tag[j] : ''; + const isBoundary = j >= n + || next === '>' || next === '/' || next === '{' + || /[A-Za-z_]/.test(next); + if (isBoundary) break; // real closing quote + value += "'"; // inner quote + changed = true; + i++; + continue; + } + value += tag[i]; + i++; + } + out += `${value}"`; + i++; // skip closing quote + continue; + } + out += tag[i]; + i++; + } + return { result: out, changed }; +} + +// Apply fixTagAttrQuotes to every JSX tag in the body, line by line, skipping +// fenced code blocks so code samples are never altered. Inline-code spans are +// not specially handled because the target tags (ZoomImage/Inline alt|title) +// never appear inside backtick spans in practice. +export function fixNestedQuotesInBody(body) { + let changed = false; + let fence = null; + const lines = body.split('\n').map((line) => { + const f = line.match(/^\s*(`{3,}|~{3,})/); + if (f) { + if (fence === null) fence = f[1][0]; + else if (line.trimStart()[0] === fence) fence = null; + return line; + } + if (fence !== null) return line; + if (!line.includes('"')) return line; + return line.replace(/<[A-Za-z][A-Za-z0-9]*\b[^>]*?\/?>/g, (tag) => { + const { result, changed: c } = fixTagAttrQuotes(tag); + if (c) changed = true; + return result; + }); + }); + return { result: lines.join('\n'), changed }; +} + +// Split a file into its frontmatter block (without fences) and body. +function splitFrontmatter(content) { + const lines = content.split('\n'); + if (lines[0]?.trim() !== '---') return { fm: null, body: content }; + for (let i = 1; i < lines.length; i++) { + if (lines[i].trim() === '---') { + return { + fm: lines.slice(1, i).join('\n'), + fmEnd: i, + head: lines.slice(0, i + 1).join('\n'), + body: lines.slice(i + 1).join('\n'), + }; + } + } + return { fm: null, body: content }; +} + +// Repair nested-quote artifacts across the whole file (frontmatter + body). +export function fixNestedQuotes(content) { + const parts = splitFrontmatter(content); + if (parts.fm === null) { + return fixNestedQuotesInBody(content); + } + const fmFix = fixFrontmatterQuotes(parts.fm); + const bodyFix = fixNestedQuotesInBody(parts.body); + if (!fmFix.changed && !bodyFix.changed) return { result: content, changed: false }; + const result = `---\n${fmFix.result}\n---\n${bodyFix.result}`; + return { result, changed: true }; +} + +// True if the content has valid frontmatter YAML AND compiles as MDX. +export async function parsesClean(content) { + const parts = splitFrontmatter(content); + if (parts.fm !== null) { + try { yaml.load(parts.fm); } catch { return false; } + } + try { + await compile(content, { jsx: true, remarkPlugins: [remarkDirective, remarkAside] }); + } catch { return false; } + return true; +} + +// Flags files containing nested-quote artifacts. Detection = "the fixer would +// change something", which only happens on genuinely broken constructs (a raw +// inner quote inside a JSX attribute or a double-quoted frontmatter scalar +// always breaks the parser). So this never fires on valid content. +function checkNestedQuotes(file, content) { + if (!fixNestedQuotes(content).changed) return []; + return [{ + file, + line: 0, + rule: 'nested-quote', + message: + 'nested double-quote inside a JSX attribute or frontmatter scalar (translation artifact); inner " must be a single quote', + }]; +} + +async function main() { const allIssues = []; // Map relative path → absolute path so --fix can write repaired content back. @@ -287,6 +452,7 @@ for await (const file of walkMdx(path.join(ROOT, 'src'))) { allIssues.push(...checkBlankAfterImports(rel, lines)); allIssues.push(...checkClientLoadImports(rel, content, lines)); allIssues.push(...checkReusableCalloutDirective(rel, content, lines)); + allIssues.push(...checkNestedQuotes(rel, content)); // Stash locale reusables for the cross-locale drift pass below. const reusableMatch = rel.match(/^src\/locales\/([^/]+)\/reusable\/(.+)\.(mdx?)$/); @@ -313,6 +479,21 @@ if (FIX) { for (const [rel, issues] of fixableByFile) { const abs = absByRel.get(rel); if (!abs) continue; + // nested-quote operates on raw content (not line indices) and uses + // fix-then-verify: only write the repair if the result parses cleanly, + // otherwise leave the file for the strict deploy gate to block. + const hasNestedQuote = issues.some(i => i.rule === 'nested-quote'); + if (hasNestedQuote) { + const original = await fs.readFile(abs, 'utf-8'); + const { result, changed } = fixNestedQuotes(original); + if (changed && await parsesClean(result)) { + await fs.writeFile(abs, result, 'utf-8'); + fixedCount += 1; + } + // A nested-quote file has no other line-based fixes queued in practice; + // skip the rest of this iteration to avoid stale line numbers. + continue; + } const lines = (await fs.readFile(abs, 'utf-8')).split('\n'); // locale-import-drift inserts at top-of-body; do those first so existing // line-numbered blank-line fixes (later in the file) stay valid. @@ -396,3 +577,10 @@ for (const i of remainingIssues) { console.error(` [${i.rule}] ${loc} — ${i.message}`); } process.exit(1); +} + +const isMain = process.argv[1] + && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url); +if (isMain) { + main(); +}