@@ -391,50 +391,137 @@ function findSearchIndexFiles(dir: string): Map<'root' | 'en', string> {
391391 return result
392392}
393393
394- function extractSearchDocs ( indexPath : string ) : Array < Record < string , unknown > > {
394+ type SerializedSearchIndex = {
395+ documentCount : number
396+ nextId : number
397+ documentIds : Record < string , string >
398+ fieldIds : Record < string , number >
399+ fieldLength : Record < string , number [ ] >
400+ averageFieldLength : number [ ]
401+ storedFields : Record < string , Record < string , unknown > >
402+ dirtCount : number
403+ index : Array < [ string , Record < string , Record < string , number > > ] >
404+ serializationVersion : number
405+ }
406+
407+ function findSearchIndexExportStart ( content : string ) : number {
408+ let match : RegExpExecArray | null
409+ let exportStart = - 1
410+ const exportPattern = / ; ? \s * e x p o r t \s * \{ / g
411+ while ( ( match = exportPattern . exec ( content ) ) !== null ) {
412+ exportStart = match . index
413+ }
414+ return exportStart
415+ }
416+
417+ function extractSearchIndex ( indexPath : string ) : SerializedSearchIndex | null {
395418 const content = readFileSync ( indexPath , 'utf-8' )
396419 const assignment = content . match ( / ^ c o n s t \s + \w + \s * = \s * / )
397- const exportStart = content . search ( / \n e x p o r t \s * \{ / )
420+ const exportStart = findSearchIndexExportStart ( content )
398421 if ( ! assignment || exportStart === - 1 ) {
399422 log ( ` ⚠ Could not parse: ${ relative ( PROJECT_ROOT , indexPath ) } ` )
400- return [ ]
423+ return null
401424 }
402425 let expr = content . slice ( assignment [ 0 ] . length , exportStart ) . trim ( )
403426 if ( expr . endsWith ( ';' ) ) expr = expr . slice ( 0 , - 1 ) . trim ( )
404427 const jsonStr : string = new Function ( `return (${ expr } )` ) ( )
405- const data = JSON . parse ( jsonStr )
406- const docs : Array < Record < string , unknown > > = [ ]
407- for ( const [ idStr , url ] of Object . entries < string > ( data . documentIds ) ) {
408- const fields = data . storedFields [ idStr ]
409- if ( ! fields ) continue
410- docs . push ( { id : url , title : fields . title || '' , titles : fields . titles || [ ] } )
428+ return JSON . parse ( jsonStr )
429+ }
430+
431+ function mergeSerializedSearchIndexes ( indexes : SerializedSearchIndex [ ] ) : SerializedSearchIndex {
432+ if ( indexes . length === 0 ) throw new Error ( 'No search indexes to merge' )
433+
434+ const fieldIds = indexes [ 0 ] . fieldIds
435+ const fieldCount = Object . keys ( fieldIds ) . length
436+ const merged : SerializedSearchIndex = {
437+ documentCount : 0 ,
438+ nextId : 0 ,
439+ documentIds : { } ,
440+ fieldIds,
441+ fieldLength : { } ,
442+ averageFieldLength : Array ( fieldCount ) . fill ( 0 ) ,
443+ storedFields : { } ,
444+ dirtCount : 0 ,
445+ index : [ ] ,
446+ serializationVersion : indexes [ 0 ] . serializationVersion ,
447+ }
448+
449+ const termIndex = new Map < string , Record < string , Record < string , number > > > ( )
450+ const fieldLengthSums = Array ( fieldCount ) . fill ( 0 )
451+
452+ for ( const data of indexes ) {
453+ const localToGlobal = new Map < string , string > ( )
454+ const fieldMap = new Map < string , string > ( )
455+
456+ for ( const [ fieldName , localFieldId ] of Object . entries ( data . fieldIds ) ) {
457+ const targetFieldId = fieldIds [ fieldName ]
458+ if ( targetFieldId === undefined ) {
459+ throw new Error ( `Incompatible search field: ${ fieldName } ` )
460+ }
461+ fieldMap . set ( String ( localFieldId ) , String ( targetFieldId ) )
462+ }
463+
464+ for ( const [ localId , url ] of Object . entries ( data . documentIds ) ) {
465+ const globalId = String ( merged . nextId ++ )
466+ localToGlobal . set ( localId , globalId )
467+ merged . documentIds [ globalId ] = url
468+ merged . storedFields [ globalId ] = data . storedFields [ localId ] || { }
469+ const lengths = data . fieldLength [ localId ] || [ ]
470+ merged . fieldLength [ globalId ] = Array ( fieldCount ) . fill ( 0 )
471+ for ( const [ localFieldId , targetFieldId ] of fieldMap ) {
472+ const len = lengths [ Number ( localFieldId ) ] || 0
473+ const targetIndex = Number ( targetFieldId )
474+ merged . fieldLength [ globalId ] [ targetIndex ] = len
475+ fieldLengthSums [ targetIndex ] += len
476+ }
477+ }
478+
479+ merged . dirtCount += data . dirtCount || 0
480+
481+ for ( const [ term , postings ] of data . index ) {
482+ const mergedPostings = termIndex . get ( term ) || { }
483+ for ( const [ localFieldId , docs ] of Object . entries ( postings ) ) {
484+ const targetFieldId = fieldMap . get ( localFieldId )
485+ if ( targetFieldId === undefined ) continue
486+ const fieldPostings = mergedPostings [ targetFieldId ] || { }
487+ for ( const [ localId , frequency ] of Object . entries ( docs ) ) {
488+ const globalId = localToGlobal . get ( localId )
489+ if ( globalId === undefined ) continue
490+ fieldPostings [ globalId ] = ( fieldPostings [ globalId ] || 0 ) + frequency
491+ }
492+ mergedPostings [ targetFieldId ] = fieldPostings
493+ }
494+ termIndex . set ( term , mergedPostings )
495+ }
411496 }
412- return docs
497+
498+ merged . documentCount = Object . keys ( merged . documentIds ) . length
499+ merged . averageFieldLength = fieldLengthSums . map ( ( sum ) => merged . documentCount > 0 ? sum / merged . documentCount : 0 )
500+ merged . index = [ ...termIndex . entries ( ) ]
501+ return merged
413502}
414503
415- async function buildSearchIndexJs ( docs : Array < Record < string , unknown > > ) : Promise < string > {
416- const MiniSearch = require ( 'minisearch' )
417- const ms = new MiniSearch ( { fields : [ 'title' , 'titles' , 'text' ] , storeFields : [ 'title' , 'titles' ] } )
418- ms . addAll ( docs )
419- const json = JSON . stringify ( ms . toJSON ( ) )
504+ function buildSearchIndexJs ( index : SerializedSearchIndex ) : string {
505+ const json = JSON . stringify ( index )
420506 // Double-stringify to get a properly escaped JS string literal (handles backticks, quotes, etc.)
421507 return `const e=${ JSON . stringify ( json ) } ;export{e as default};`
422508}
423509
424510async function mergeSearchIndexes ( sources : SearchIndexSource [ ] , finalDist : string ) {
425511 logStep ( 'Step 3/4: Merging search indexes' )
426512
427- const docsByLang : Record < 'zh' | 'en' , Array < Record < string , unknown > > > = { zh : [ ] , en : [ ] }
513+ const indexesByLang : Record < 'zh' | 'en' , SerializedSearchIndex [ ] > = { zh : [ ] , en : [ ] }
428514 const targetsByLang : Record < 'zh' | 'en' , Set < string > > = { zh : new Set ( ) , en : new Set ( ) }
429515
430516 for ( const source of sources ) {
431517 for ( const [ locale , indexPath ] of findSearchIndexFiles ( source . dir ) ) {
432518 const lang = source . lang === 'mixed'
433519 ? ( locale === 'en' ? 'en' : 'zh' )
434520 : source . lang
435- const docs = extractSearchDocs ( indexPath )
436- log ( ` ${ lang } : ${ docs . length } docs from ${ relative ( PROJECT_ROOT , source . dir ) } (${ locale } )` )
437- docsByLang [ lang ] . push ( ...docs )
521+ const index = extractSearchIndex ( indexPath )
522+ if ( ! index ) continue
523+ log ( ` ${ lang } : ${ index . documentCount } docs from ${ relative ( PROJECT_ROOT , source . dir ) } (${ locale } )` )
524+ indexesByLang [ lang ] . push ( index )
438525
439526 const target = join ( finalDist , 'assets' , 'chunks' , basename ( indexPath ) )
440527 if ( existsSync ( target ) ) {
@@ -446,10 +533,11 @@ async function mergeSearchIndexes(sources: SearchIndexSource[], finalDist: strin
446533 }
447534
448535 for ( const lang of [ 'zh' , 'en' ] as const ) {
449- const allDocs = docsByLang [ lang ]
450- if ( allDocs . length === 0 ) { log ( ` ${ lang } : no docs, skipping` ) ; continue }
451- log ( ` ${ lang } : merging ${ allDocs . length } total docs...` )
452- const js = await buildSearchIndexJs ( allDocs )
536+ const indexes = indexesByLang [ lang ]
537+ if ( indexes . length === 0 ) { log ( ` ${ lang } : no indexes, skipping` ) ; continue }
538+ const mergedIndex = mergeSerializedSearchIndexes ( indexes )
539+ log ( ` ${ lang } : merging ${ mergedIndex . documentCount } total docs...` )
540+ const js = buildSearchIndexJs ( mergedIndex )
453541 const allTargets = [ ...targetsByLang [ lang ] ]
454542 if ( allTargets . length === 0 ) {
455543 log ( ` ⚠ ${ lang } : no target index files in final dist!` )
0 commit comments