66
77import { existsSync } from "../../../deno_ral/fs.ts" ;
88import { basename , join , relative } from "../../../deno_ral/path.ts" ;
9+ import * as pagefind from "pagefind" ;
910
1011// currently not building the index here so not using fuse
1112// @ deno-types="fuse/dist/fuse.d.ts"
@@ -90,22 +91,52 @@ const kLimit = "limit";
9091// Whether to show the parent in the search results
9192const kShowItemContext = "show-item-context" ;
9293
93- // Any aloglia configuration
94+ // Any algolia configuration
9495const kAlgolia = "algolia" ;
9596
97+ // The search engine backend
98+ const kEngine = "engine" ;
99+
100+ // Pagefind-specific configuration
101+ const kPagefind = "pagefind" ;
102+ const kRootSelector = "root-selector" ;
103+ const kExcludeSelectors = "exclude-selectors" ;
104+ const kForceLanguage = "force-language" ;
105+ const kRanking = "ranking" ;
106+ const kPageLength = "page-length" ;
107+ const kTermFrequency = "term-frequency" ;
108+ const kTermSaturation = "term-saturation" ;
109+ const kTermSimilarity = "term-similarity" ;
110+
111+ export type SearchEngine = "fuse" | "pagefind" | "algolia" ;
112+
96113interface SearchOptions {
97114 [ kLocation ] : SearchInputLocation ;
98115 [ kCopyButton ] : boolean ;
99116 [ kCollapseAfter ] : boolean | number ;
100117 [ kType ] : "textbox" | "overlay" ;
101118 [ kPanelPlacement ] : "start" | "end" | "full-width" | "input-wrapper-width" ;
102119 [ kLimit ] ?: number ;
120+ [ kEngine ] ?: SearchEngine ;
103121 [ kAlgolia ] ?: SearchOptionsAlgolia ;
122+ [ kPagefind ] ?: SearchOptionsPagefind ;
104123 [ kLanguageDefaults ] ?: FormatLanguage ;
105124 [ kKbShortcutSearch ] ?: string [ ] ;
106125 [ kShowItemContext ] ?: boolean | "parent" | "root" | "tree" ;
107126}
108127
128+ interface SearchOptionsPagefind {
129+ [ kRootSelector ] ?: string ;
130+ [ kExcludeSelectors ] ?: string [ ] ;
131+ [ kForceLanguage ] ?: string ;
132+ [ kRanking ] ?: {
133+ [ kPageLength ] ?: number ;
134+ [ kTermFrequency ] ?: number ;
135+ [ kTermSaturation ] ?: number ;
136+ [ kTermSimilarity ] ?: number ;
137+ } ;
138+ }
139+
109140const kSearchOnlyApiKey = "search-only-api-key" ;
110141const kSearchApplicationId = "application-id" ;
111142const kSearchParams = "params" ;
@@ -425,6 +456,172 @@ export async function updateSearchIndex(
425456 }
426457}
427458
459+ export async function runPagefindIndex (
460+ context : ProjectContext ,
461+ outputFiles : ProjectOutputFile [ ] ,
462+ ) {
463+ const outputDir = projectOutputDir ( context ) ;
464+
465+ // Get pagefind-specific options
466+ const options = await searchOptions ( context ) ;
467+ const pagefindOpts = options ?. [ kPagefind ] ;
468+
469+ // Annotate HTML files with breadcrumbs and search exclusions before indexing
470+ await annotateHtmlForPagefind ( context , outputFiles ) ;
471+
472+ // Use the statically imported pagefind module
473+
474+ // Build the createIndex config
475+ const rootSelector = pagefindOpts ?. [ kRootSelector ] ?? "main" ;
476+ const defaultExcludeSelectors = [
477+ "nav[role='doc-toc']" ,
478+ "#title-block-header" ,
479+ "script" ,
480+ "style" ,
481+ ".sidebar" ,
482+ ".quarto-title-block" ,
483+ ] ;
484+ const userExcludeSelectors = pagefindOpts ?. [ kExcludeSelectors ] ?? [ ] ;
485+ const excludeSelectors = [
486+ ...defaultExcludeSelectors ,
487+ ...userExcludeSelectors ,
488+ ] ;
489+
490+ const indexConfig : Record < string , unknown > = {
491+ rootSelector,
492+ excludeSelectors,
493+ } ;
494+ if ( pagefindOpts ?. [ kForceLanguage ] ) {
495+ indexConfig . forceLanguage = pagefindOpts [ kForceLanguage ] ;
496+ }
497+
498+ // Create index
499+ const { index, errors : createErrors } = await pagefind . createIndex (
500+ indexConfig ,
501+ ) ;
502+ if ( createErrors . length > 0 ) {
503+ warning ( "Pagefind index creation warnings: " + createErrors . join ( ", " ) ) ;
504+ }
505+ if ( ! index ) {
506+ warning ( "Pagefind failed to create index" ) ;
507+ return ;
508+ }
509+
510+ // Index the output directory
511+ const { errors : addErrors , page_count } = await index . addDirectory ( {
512+ path : outputDir ,
513+ } ) ;
514+ if ( addErrors . length > 0 ) {
515+ warning ( "Pagefind indexing warnings: " + addErrors . join ( ", " ) ) ;
516+ }
517+
518+ // Write the pagefind bundle to the output directory
519+ const pagefindOutputPath = join ( outputDir , "pagefind" ) ;
520+ const { errors : writeErrors } = await index . writeFiles ( {
521+ outputPath : pagefindOutputPath ,
522+ } ) ;
523+ if ( writeErrors . length > 0 ) {
524+ warning (
525+ "Pagefind write warnings: " + writeErrors . join ( ", " ) ,
526+ ) ;
527+ }
528+
529+ await pagefind . close ( ) ;
530+ }
531+
532+ async function annotateHtmlForPagefind (
533+ context : ProjectContext ,
534+ outputFiles : ProjectOutputFile [ ] ,
535+ ) {
536+ const outputDir = projectOutputDir ( context ) ;
537+ const draftMode = projectDraftMode ( context ) ;
538+
539+ for ( const outputFile of outputFiles ) {
540+ // Skip non-HTML files
541+ if ( ! isHtmlFileOutput ( outputFile . format . pandoc ) ) {
542+ continue ;
543+ }
544+
545+ const file = outputFile . file ;
546+ const href = pathWithForwardSlashes ( relative ( outputDir , file ) ) ;
547+
548+ // Check for search exclusion (same logic as updateSearchIndex)
549+ const index = await resolveInputTargetForOutputFile (
550+ context ,
551+ relative ( outputDir , outputFile . file ) ,
552+ ) ;
553+ const draft = index ? index . draft : false ;
554+ const excluded = outputFile . format . metadata [ kSearch ] === false ||
555+ ( draft === true && ! isDraftVisible ( draftMode ) ) ;
556+
557+ // Read the HTML
558+ const html = Deno . readTextFileSync ( file ) ;
559+ const doc = new DOMParser ( ) . parseFromString ( html , "text/html" ) ;
560+ if ( ! doc ) continue ;
561+
562+ let modified = false ;
563+
564+ // For excluded pages, inject data-pagefind-ignore on <body>
565+ if ( excluded ) {
566+ const body = doc . querySelector ( "body" ) ;
567+ if ( body ) {
568+ ( body as Element ) . setAttribute ( "data-pagefind-ignore" , "all" ) ;
569+ modified = true ;
570+ }
571+ }
572+
573+ // Compute and inject breadcrumbs (same logic as updateSearchIndex)
574+ const navHref = `/${ href } ` ;
575+ const sidebar = sidebarForHref ( navHref , outputFile . format ) ;
576+ if ( sidebar ) {
577+ const bc = breadCrumbs ( navHref , sidebar ) ;
578+ const crumbTexts = bc . length > 0
579+ ? bc . filter ( ( crumb ) => crumb . text !== undefined )
580+ . map ( ( crumb ) => crumb . text as string )
581+ : [ ] ;
582+
583+ // Merge navbar crumbs if applicable
584+ // deno-lint-ignore no-explicit-any
585+ const mergeNavBarSearchCrumbs = ( outputFile . format . metadata as any )
586+ ?. website ?. search ?. [ "merge-navbar-crumbs" ] ;
587+ if ( mergeNavBarSearchCrumbs !== false && crumbTexts . length > 0 ) {
588+ const navItem = navbarItemForSidebar ( sidebar , outputFile . format ) ;
589+ if ( navItem && typeof navItem === "object" ) {
590+ const navbarParentText = ( navItem as NavigationItemObject ) . text ;
591+ if (
592+ navbarParentText && crumbTexts . length > 0 &&
593+ crumbTexts [ 0 ] !== navbarParentText
594+ ) {
595+ crumbTexts . unshift ( navbarParentText ) ;
596+ }
597+ }
598+ }
599+
600+ if ( crumbTexts . length > 0 ) {
601+ const mainEl = doc . querySelector ( "main" ) ;
602+ if ( mainEl ) {
603+ const meta = doc . createElement ( "meta" ) ;
604+ ( meta as Element ) . setAttribute (
605+ "data-pagefind-meta" ,
606+ `crumbs:${ crumbTexts . join ( "||" ) } ` ,
607+ ) ;
608+ mainEl . insertBefore ( meta , mainEl . firstChild ) ;
609+ modified = true ;
610+ }
611+ }
612+ }
613+
614+ // Write back if modified
615+ if ( modified ) {
616+ // Serialize back to HTML, preserving the original doctype
617+ const doctype = html . match ( / ^ < ! D O C T Y P E [ ^ > ] * > / i) ?. [ 0 ] ?? "" ;
618+ const serialized = doctype + "\n" +
619+ ( doc . documentElement ?. outerHTML ?? "" ) ;
620+ Deno . writeTextFileSync ( file , serialized ) ;
621+ }
622+ }
623+ }
624+
428625const kDefaultCollapse = 3 ;
429626
430627export async function searchOptions (
@@ -444,14 +641,23 @@ export async function searchOptions(
444641 ? kDefaultCollapse
445642 : false ;
446643
644+ // Determine the search engine
645+ const algolia = algoliaOptions ( searchMetadata , project ) ;
646+ const engine = searchEngine ( searchMetadata , algolia ) ;
647+ const pagefindOpts = engine === "pagefind"
648+ ? pagefindOptions ( searchMetadata )
649+ : undefined ;
650+
447651 return {
448652 [ kLocation ] : location ,
449653 [ kCopyButton ] : searchMetadata [ kCopyButton ] === true ,
450654 [ kCollapseAfter ] : collapseMatches ,
451655 [ kPanelPlacement ] : location === "navbar" ? "end" : "start" ,
452656 [ kType ] : searchType ( searchMetadata [ kType ] , location ) ,
453657 [ kLimit ] : searchInputLimit ( searchMetadata ) ,
454- [ kAlgolia ] : algoliaOptions ( searchMetadata , project ) ,
658+ [ kEngine ] : engine ,
659+ [ kAlgolia ] : algolia ,
660+ [ kPagefind ] : pagefindOpts ,
455661 [ kKbShortcutSearch ] : searchKbdShortcut ( searchMetadata ) ,
456662 [ kShowItemContext ] : searchShowItemContext ( searchMetadata ) ,
457663 } ;
@@ -565,6 +771,61 @@ function algoliaOptions(
565771 }
566772}
567773
774+ function searchEngine (
775+ searchConfig : Record < string , unknown > ,
776+ algolia : SearchOptionsAlgolia | undefined ,
777+ ) : SearchEngine {
778+ const engineRaw = searchConfig [ kEngine ] ;
779+ if ( typeof engineRaw === "string" ) {
780+ if ( engineRaw === "pagefind" || engineRaw === "algolia" || engineRaw === "fuse" ) {
781+ return engineRaw ;
782+ }
783+ }
784+ // Auto-detect algolia when algolia config is present (backward compat)
785+ if ( algolia ) {
786+ return "algolia" ;
787+ }
788+ return "fuse" ;
789+ }
790+
791+ function pagefindOptions (
792+ searchConfig : Record < string , unknown > ,
793+ ) : SearchOptionsPagefind | undefined {
794+ const pagefindRaw = searchConfig [ kPagefind ] ;
795+ if ( pagefindRaw && typeof pagefindRaw === "object" ) {
796+ const pagefindObj = pagefindRaw as Record < string , unknown > ;
797+ const result : SearchOptionsPagefind = { } ;
798+ if ( typeof pagefindObj [ kRootSelector ] === "string" ) {
799+ result [ kRootSelector ] = pagefindObj [ kRootSelector ] as string ;
800+ }
801+ if ( Array . isArray ( pagefindObj [ kExcludeSelectors ] ) ) {
802+ result [ kExcludeSelectors ] = pagefindObj [ kExcludeSelectors ] as string [ ] ;
803+ }
804+ if ( typeof pagefindObj [ kForceLanguage ] === "string" ) {
805+ result [ kForceLanguage ] = pagefindObj [ kForceLanguage ] as string ;
806+ }
807+ const rankingRaw = pagefindObj [ kRanking ] ;
808+ if ( rankingRaw && typeof rankingRaw === "object" ) {
809+ const r = rankingRaw as Record < string , unknown > ;
810+ result [ kRanking ] = { } ;
811+ if ( typeof r [ kPageLength ] === "number" ) {
812+ result [ kRanking ] [ kPageLength ] = r [ kPageLength ] as number ;
813+ }
814+ if ( typeof r [ kTermFrequency ] === "number" ) {
815+ result [ kRanking ] [ kTermFrequency ] = r [ kTermFrequency ] as number ;
816+ }
817+ if ( typeof r [ kTermSaturation ] === "number" ) {
818+ result [ kRanking ] [ kTermSaturation ] = r [ kTermSaturation ] as number ;
819+ }
820+ if ( typeof r [ kTermSimilarity ] === "number" ) {
821+ result [ kRanking ] [ kTermSimilarity ] = r [ kTermSimilarity ] as number ;
822+ }
823+ }
824+ return result ;
825+ }
826+ return undefined ;
827+ }
828+
568829export async function searchInputLocation (
569830 project : ProjectContext ,
570831) : Promise < SearchInputLocation > {
@@ -658,7 +919,9 @@ export async function websiteSearchDependency(
658919
659920 const scripts = [
660921 searchDependency ( "autocomplete.umd.js" ) ,
661- searchDependency ( "fuse.min.js" ) ,
922+ ...( options [ kEngine ] !== "pagefind"
923+ ? [ searchDependency ( "fuse.min.js" ) ]
924+ : [ ] ) ,
662925 searchDependency ( "quarto-search.js" ) ,
663926 ] ;
664927
0 commit comments