@@ -5,6 +5,156 @@ import { SiteSearchIndexItem } from '../types/site-search';
55import parseFrontmatter from '../utils/parseFrontmatter' ;
66import truncateHtml from '../utils/truncateHtml' ;
77
8+ /**
9+ * Strip markdown/MDX syntax to produce plain text for search indexing
10+ */
11+ function stripMarkdown ( md : string ) : string {
12+ return md
13+ . replace ( / ^ - - - [ \s \S ] * ?- - - \n ? / , '' ) // frontmatter
14+ . replace ( / i m p o r t \s + .* ?f r o m \s + [ ' " ] .* ?[ ' " ] \s * ; ? \n ? / g, '' ) // ESM imports
15+ . replace ( / < [ ^ > ] + > / g, '' ) // HTML/JSX tags
16+ . replace ( / ! \[ .* ?\] \( .* ?\) / g, '' ) // images
17+ . replace ( / \[ ( [ ^ \] ] * ) \] \( [ ^ ) ] * \) / g, '$1' ) // links → text
18+ . replace ( / # { 1 , 6 } \s + / g, '' ) // headings
19+ . replace ( / ( \* { 1 , 3 } | _ { 1 , 3 } ) ( .* ?) \1/ g, '$2' ) // bold/italic
20+ . replace ( / ` { 1 , 3 } [ ^ ` ] * ` { 1 , 3 } / g, '' ) // inline/block code
21+ . replace ( / > \s ? / gm, '' ) // blockquotes
22+ . replace ( / [ - * + ] \s + / gm, '' ) // list markers
23+ . replace ( / \d + \. \s + / gm, '' ) // ordered list markers
24+ . replace ( / \n { 2 , } / g, '\n' ) // collapse blank lines
25+ . replace ( / \s + / g, ' ' ) // normalize whitespace
26+ . trim ( ) ;
27+ }
28+
29+ /**
30+ * Recursively find all .mdx/.md files in a directory
31+ */
32+ function findAllMdxFiles ( dir : string ) : string [ ] {
33+ const results : string [ ] = [ ] ;
34+ if ( ! fs . existsSync ( dir ) ) return results ;
35+
36+ const entries = fs . readdirSync ( dir , { withFileTypes : true } ) ;
37+ for ( const entry of entries ) {
38+ const fullPath = path . join ( dir , entry . name ) ;
39+ if ( entry . isDirectory ( ) ) {
40+ results . push ( ...findAllMdxFiles ( fullPath ) ) ;
41+ } else if ( entry . name . endsWith ( '.mdx' ) || entry . name . endsWith ( '.md' ) ) {
42+ // Skip index.json artifacts
43+ if ( entry . name !== 'index.json' ) {
44+ results . push ( fullPath ) ;
45+ }
46+ }
47+ }
48+ return results ;
49+ }
50+
51+ /**
52+ * Map a file path to its site URL
53+ * e.g. content/about/what-is-reactome.mdx → /about/what-is-reactome
54+ * content/about/news/article-1.mdx → /about/news/article-1
55+ * content/documentation/dev/index.mdx → /documentation/dev
56+ */
57+ function filePathToUrl ( filePath : string , contentRoot : string ) : string {
58+ let relative = path . relative ( contentRoot , filePath ) ;
59+ // Remove extension
60+ relative = relative . replace ( / \. ( m d x | m d ) $ / , '' ) ;
61+ // Remove trailing /index
62+ relative = relative . replace ( / \/ i n d e x $ / , '' ) ;
63+ // Convert to URL
64+ return '/' + relative . replace ( / \\ / g, '/' ) ;
65+ }
66+
67+ /**
68+ * Infer a human-readable category from the top-level directory
69+ */
70+ function inferCategory ( url : string ) : string {
71+ const categoryMap : Record < string , string > = {
72+ about : 'About' ,
73+ content : 'Content' ,
74+ documentation : 'Documentation' ,
75+ community : 'Community' ,
76+ tools : 'Tools' ,
77+ } ;
78+ const topDir = url . split ( '/' ) [ 1 ] || '' ;
79+ // Special sub-categories
80+ if ( url . startsWith ( '/about/news/' ) ) return 'News' ;
81+ if ( url . startsWith ( '/content/reactome-research-spotlight/' ) )
82+ return 'Research Spotlight' ;
83+ return categoryMap [ topDir ] || 'Other' ;
84+ }
85+
86+ /**
87+ * Generate a consolidated site search index covering all content
88+ */
89+ function generateSiteSearchIndex ( ) : void {
90+ const contentRoot = path . resolve (
91+ process . cwd ( ) ,
92+ 'projects' ,
93+ 'website-angular' ,
94+ 'content'
95+ ) ;
96+
97+ if ( ! fs . existsSync ( contentRoot ) ) {
98+ console . warn ( 'Content directory not found:' , contentRoot ) ;
99+ return ;
100+ }
101+
102+ const allFiles = findAllMdxFiles ( contentRoot ) ;
103+ const items : SiteSearchIndexItem [ ] = [ ] ;
104+ const seenUrls = new Set < string > ( ) ;
105+ let nextId = 1 ;
106+
107+ for ( const filePath of allFiles ) {
108+ const raw = fs . readFileSync ( filePath , 'utf-8' ) ;
109+ const { frontmatter, body } = parseFrontmatter ( raw ) ;
110+
111+ const url = filePathToUrl ( filePath , contentRoot ) ;
112+
113+ // Skip duplicates (e.g. collaboration.mdx and collaboration/index.mdx)
114+ if ( seenUrls . has ( url ) ) continue ;
115+ seenUrls . add ( url ) ;
116+ const title =
117+ ( frontmatter [ 'title' ] as string ) ||
118+ path
119+ . basename ( filePath )
120+ . replace ( / \. ( m d x | m d ) $ / , '' )
121+ . replace ( / - / g, ' ' ) ;
122+ const category = ( frontmatter [ 'category' ] as string )
123+ ? inferCategory ( url )
124+ : inferCategory ( url ) ;
125+ const plainBody = stripMarkdown ( body ) ;
126+ const excerpt =
127+ plainBody . slice ( 0 , 200 ) + ( plainBody . length > 200 ? '...' : '' ) ;
128+
129+ items . push ( {
130+ id : nextId ++ ,
131+ title,
132+ category,
133+ url,
134+ body : plainBody ,
135+ excerpt,
136+ date : ( frontmatter [ 'date' ] as string ) || undefined ,
137+ } ) ;
138+ }
139+
140+ // Write to public assets so it can be fetched at runtime
141+ const outputDir = path . resolve (
142+ process . cwd ( ) ,
143+ 'projects' ,
144+ 'website-angular' ,
145+ 'public'
146+ ) ;
147+ if ( ! fs . existsSync ( outputDir ) ) {
148+ fs . mkdirSync ( outputDir , { recursive : true } ) ;
149+ }
150+
151+ const outputPath = path . join ( outputDir , 'site-search-index.json' ) ;
152+ fs . writeFileSync ( outputPath , JSON . stringify ( items ) ) ;
153+ console . log (
154+ `Site search index generated: ${ items . length } entries → ${ outputPath } `
155+ ) ;
156+ }
157+
8158function loadNewsArticlesFromDir ( dir : string ) : ArticleIndexItem [ ] {
9159 if ( ! fs . existsSync ( dir ) ) return [ ] ;
10160
@@ -19,9 +169,7 @@ function loadNewsArticlesFromDir(dir: string): ArticleIndexItem[] {
19169 const { frontmatter, body } = parseFrontmatter ( content ) ;
20170
21171 return {
22- title :
23- frontmatter [ 'title' ] ||
24- filename . replace ( / \. ( m d x | m d ) $ / , '' ) ,
172+ title : frontmatter [ 'title' ] || filename . replace ( / \. ( m d x | m d ) $ / , '' ) ,
25173 author : frontmatter [ 'author' ] || undefined ,
26174 excerpt : truncateHtml ( body || '' , 50 ) ,
27175 date : frontmatter [ 'date' ] || new Date ( ) . toISOString ( ) ,
@@ -36,11 +184,7 @@ function loadNewsArticlesFromDir(dir: string): ArticleIndexItem[] {
36184 : frontmatter [ 'tags' ] ,
37185 } as ArticleIndexItem ;
38186 } )
39- . sort (
40- ( a , b ) =>
41- new Date ( b . date ) . getTime ( ) -
42- new Date ( a . date ) . getTime ( )
43- ) ;
187+ . sort ( ( a , b ) => new Date ( b . date ) . getTime ( ) - new Date ( a . date ) . getTime ( ) ) ;
44188}
45189
46190function buildRecursiveIndex ( dir : string ) : any {
@@ -73,10 +217,7 @@ function buildRecursiveIndex(dir: string): any {
73217/**
74218 * Generate a JSON file with optional recursive indexing
75219 */
76- function generateIndex (
77- directories : string [ ] ,
78- recursive : boolean = true
79- ) : void {
220+ function generateIndex ( directories : string [ ] , recursive : boolean = true ) : void {
80221 const outputDir = path . resolve ( process . cwd ( ) , ...directories ) ;
81222
82223 if ( ! fs . existsSync ( outputDir ) ) {
@@ -94,5 +235,15 @@ function generateIndex(
94235
95236// Run on module load
96237generateIndex ( [ 'projects' , 'website-angular' , 'content' , 'about' , 'news' ] ) ;
97- generateIndex ( [ 'projects' , 'website-angular' , 'content' , 'content' , 'reactome-research-spotlight' ] ) ;
98- generateIndex ( [ 'projects' , 'website-angular' , 'content' , 'documentation' , 'faq' ] , true ) ;
238+ generateIndex ( [
239+ 'projects' ,
240+ 'website-angular' ,
241+ 'content' ,
242+ 'content' ,
243+ 'reactome-research-spotlight' ,
244+ ] ) ;
245+ generateIndex (
246+ [ 'projects' , 'website-angular' , 'content' , 'documentation' , 'faq' ] ,
247+ true
248+ ) ;
249+ generateSiteSearchIndex ( ) ;
0 commit comments