Skip to content

Commit 17e50c1

Browse files
hotfix: fix the search engine crash (#22)
1 parent df9bd65 commit 17e50c1

1 file changed

Lines changed: 111 additions & 23 deletions

File tree

scripts/build.ts

Lines changed: 111 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -391,50 +391,137 @@ function findSearchIndexFiles(dir: string): Map<'root' | 'en', string> {
391391
return result
392392
}
393393

394-
function extractSearchDocs(indexPath: string): Array<Record<string, unknown>> {
394+
type SerializedSearchIndex = {
395+
documentCount: number
396+
nextId: number
397+
documentIds: Record<string, string>
398+
fieldIds: Record<string, number>
399+
fieldLength: Record<string, number[]>
400+
averageFieldLength: number[]
401+
storedFields: Record<string, Record<string, unknown>>
402+
dirtCount: number
403+
index: Array<[string, Record<string, Record<string, number>>]>
404+
serializationVersion: number
405+
}
406+
407+
function findSearchIndexExportStart(content: string): number {
408+
let match: RegExpExecArray | null
409+
let exportStart = -1
410+
const exportPattern = /;?\s*export\s*\{/g
411+
while ((match = exportPattern.exec(content)) !== null) {
412+
exportStart = match.index
413+
}
414+
return exportStart
415+
}
416+
417+
function extractSearchIndex(indexPath: string): SerializedSearchIndex | null {
395418
const content = readFileSync(indexPath, 'utf-8')
396419
const assignment = content.match(/^const\s+\w+\s*=\s*/)
397-
const exportStart = content.search(/\nexport\s*\{/)
420+
const exportStart = findSearchIndexExportStart(content)
398421
if (!assignment || exportStart === -1) {
399422
log(` ⚠ Could not parse: ${relative(PROJECT_ROOT, indexPath)}`)
400-
return []
423+
return null
401424
}
402425
let expr = content.slice(assignment[0].length, exportStart).trim()
403426
if (expr.endsWith(';')) expr = expr.slice(0, -1).trim()
404427
const jsonStr: string = new Function(`return (${expr})`)()
405-
const data = JSON.parse(jsonStr)
406-
const docs: Array<Record<string, unknown>> = []
407-
for (const [idStr, url] of Object.entries<string>(data.documentIds)) {
408-
const fields = data.storedFields[idStr]
409-
if (!fields) continue
410-
docs.push({ id: url, title: fields.title || '', titles: fields.titles || [] })
428+
return JSON.parse(jsonStr)
429+
}
430+
431+
function mergeSerializedSearchIndexes(indexes: SerializedSearchIndex[]): SerializedSearchIndex {
432+
if (indexes.length === 0) throw new Error('No search indexes to merge')
433+
434+
const fieldIds = indexes[0].fieldIds
435+
const fieldCount = Object.keys(fieldIds).length
436+
const merged: SerializedSearchIndex = {
437+
documentCount: 0,
438+
nextId: 0,
439+
documentIds: {},
440+
fieldIds,
441+
fieldLength: {},
442+
averageFieldLength: Array(fieldCount).fill(0),
443+
storedFields: {},
444+
dirtCount: 0,
445+
index: [],
446+
serializationVersion: indexes[0].serializationVersion,
447+
}
448+
449+
const termIndex = new Map<string, Record<string, Record<string, number>>>()
450+
const fieldLengthSums = Array(fieldCount).fill(0)
451+
452+
for (const data of indexes) {
453+
const localToGlobal = new Map<string, string>()
454+
const fieldMap = new Map<string, string>()
455+
456+
for (const [fieldName, localFieldId] of Object.entries(data.fieldIds)) {
457+
const targetFieldId = fieldIds[fieldName]
458+
if (targetFieldId === undefined) {
459+
throw new Error(`Incompatible search field: ${fieldName}`)
460+
}
461+
fieldMap.set(String(localFieldId), String(targetFieldId))
462+
}
463+
464+
for (const [localId, url] of Object.entries(data.documentIds)) {
465+
const globalId = String(merged.nextId++)
466+
localToGlobal.set(localId, globalId)
467+
merged.documentIds[globalId] = url
468+
merged.storedFields[globalId] = data.storedFields[localId] || {}
469+
const lengths = data.fieldLength[localId] || []
470+
merged.fieldLength[globalId] = Array(fieldCount).fill(0)
471+
for (const [localFieldId, targetFieldId] of fieldMap) {
472+
const len = lengths[Number(localFieldId)] || 0
473+
const targetIndex = Number(targetFieldId)
474+
merged.fieldLength[globalId][targetIndex] = len
475+
fieldLengthSums[targetIndex] += len
476+
}
477+
}
478+
479+
merged.dirtCount += data.dirtCount || 0
480+
481+
for (const [term, postings] of data.index) {
482+
const mergedPostings = termIndex.get(term) || {}
483+
for (const [localFieldId, docs] of Object.entries(postings)) {
484+
const targetFieldId = fieldMap.get(localFieldId)
485+
if (targetFieldId === undefined) continue
486+
const fieldPostings = mergedPostings[targetFieldId] || {}
487+
for (const [localId, frequency] of Object.entries(docs)) {
488+
const globalId = localToGlobal.get(localId)
489+
if (globalId === undefined) continue
490+
fieldPostings[globalId] = (fieldPostings[globalId] || 0) + frequency
491+
}
492+
mergedPostings[targetFieldId] = fieldPostings
493+
}
494+
termIndex.set(term, mergedPostings)
495+
}
411496
}
412-
return docs
497+
498+
merged.documentCount = Object.keys(merged.documentIds).length
499+
merged.averageFieldLength = fieldLengthSums.map((sum) => merged.documentCount > 0 ? sum / merged.documentCount : 0)
500+
merged.index = [...termIndex.entries()]
501+
return merged
413502
}
414503

415-
async function buildSearchIndexJs(docs: Array<Record<string, unknown>>): Promise<string> {
416-
const MiniSearch = require('minisearch')
417-
const ms = new MiniSearch({ fields: ['title', 'titles', 'text'], storeFields: ['title', 'titles'] })
418-
ms.addAll(docs)
419-
const json = JSON.stringify(ms.toJSON())
504+
function buildSearchIndexJs(index: SerializedSearchIndex): string {
505+
const json = JSON.stringify(index)
420506
// Double-stringify to get a properly escaped JS string literal (handles backticks, quotes, etc.)
421507
return `const e=${JSON.stringify(json)};export{e as default};`
422508
}
423509

424510
async function mergeSearchIndexes(sources: SearchIndexSource[], finalDist: string) {
425511
logStep('Step 3/4: Merging search indexes')
426512

427-
const docsByLang: Record<'zh' | 'en', Array<Record<string, unknown>>> = { zh: [], en: [] }
513+
const indexesByLang: Record<'zh' | 'en', SerializedSearchIndex[]> = { zh: [], en: [] }
428514
const targetsByLang: Record<'zh' | 'en', Set<string>> = { zh: new Set(), en: new Set() }
429515

430516
for (const source of sources) {
431517
for (const [locale, indexPath] of findSearchIndexFiles(source.dir)) {
432518
const lang = source.lang === 'mixed'
433519
? (locale === 'en' ? 'en' : 'zh')
434520
: source.lang
435-
const docs = extractSearchDocs(indexPath)
436-
log(` ${lang}: ${docs.length} docs from ${relative(PROJECT_ROOT, source.dir)} (${locale})`)
437-
docsByLang[lang].push(...docs)
521+
const index = extractSearchIndex(indexPath)
522+
if (!index) continue
523+
log(` ${lang}: ${index.documentCount} docs from ${relative(PROJECT_ROOT, source.dir)} (${locale})`)
524+
indexesByLang[lang].push(index)
438525

439526
const target = join(finalDist, 'assets', 'chunks', basename(indexPath))
440527
if (existsSync(target)) {
@@ -446,10 +533,11 @@ async function mergeSearchIndexes(sources: SearchIndexSource[], finalDist: strin
446533
}
447534

448535
for (const lang of ['zh', 'en'] as const) {
449-
const allDocs = docsByLang[lang]
450-
if (allDocs.length === 0) { log(` ${lang}: no docs, skipping`); continue }
451-
log(` ${lang}: merging ${allDocs.length} total docs...`)
452-
const js = await buildSearchIndexJs(allDocs)
536+
const indexes = indexesByLang[lang]
537+
if (indexes.length === 0) { log(` ${lang}: no indexes, skipping`); continue }
538+
const mergedIndex = mergeSerializedSearchIndexes(indexes)
539+
log(` ${lang}: merging ${mergedIndex.documentCount} total docs...`)
540+
const js = buildSearchIndexJs(mergedIndex)
453541
const allTargets = [...targetsByLang[lang]]
454542
if (allTargets.length === 0) {
455543
log(` ⚠ ${lang}: no target index files in final dist!`)

0 commit comments

Comments
 (0)