Skip to content

Commit cb1832c

Browse files
committed
websites - add pagefind.app support
1 parent a976a5c commit cb1832c

25 files changed

Lines changed: 1342 additions & 31 deletions

File tree

.claude/CLAUDE.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ Tests live in `tests/` and require R, Python, and Julia. See `.claude/rules/test
7575
```bash
7676
cd tests
7777
./run-tests.sh smoke/render/render.test.ts # Linux/macOS
78+
# consider using run-fast-tests.sh for tests that don't require execution environment configuration (jupyter, knitr, etc)
79+
# ./run-fast-tests.sh smoke/render/render.test.ts
7880
.\run-tests.ps1 smoke/render/render.test.ts # Windows
7981
```
8082

@@ -244,6 +246,48 @@ LaTeX error pattern maintenance is documented in [dev-docs/tinytex-pattern-maint
244246
- Internals guides
245247
- Performance monitoring
246248

249+
## Plans
250+
251+
We use plans for additional context and bookkeeping. Write plans to `.claude/plans/YYYY-MM-DD-<description>.md`.
252+
253+
### File Structure
254+
Plan files should include:
255+
256+
1. **Overview**: Brief description of the plan's goals and context
257+
2. **Checklist**: A markdown checklist of all work items using `- [ ]` syntax
258+
3. **Details**: Additional context, design decisions, or implementation notes as needed
259+
260+
### Maintaining Progress
261+
As you work through a plan:
262+
263+
1. **Update the plan file** after completing each work item
264+
2. **Check off items** by changing `- [ ]` to `- [x]`
265+
3. **Keep the plan file current** - it serves as both a roadmap and progress tracker
266+
4. **Add new items** if you discover additional work during implementation
267+
268+
### Excerpt from a simple Plan File
269+
270+
```markdown
271+
...
272+
273+
## Work Items
274+
275+
- [x] Review current runtime service implementations
276+
- [x] Identify common patterns
277+
- [ ] Update StandalonePlatform to use shared base
278+
- [ ] Update tests
279+
- [ ] Update documentation
280+
```
281+
282+
### When to Use Plan Files
283+
284+
Create plan files for:
285+
- Multi-step features spanning multiple packages
286+
- Complex refactoring that requires coordination
287+
- Tasks where tracking progress helps ensure nothing is missed
288+
289+
Complex plans can have phases, and work items are then split into multiple lists, one for each phase. Simple plans should have a single checklist.
290+
247291
## Contributing
248292

249293
See CONTRIBUTING.md for pull request guidelines. Significant changes require a signed contributor agreement (individual or corporate).

.claude/plans/2026-03-10-pagefind-search.md

Lines changed: 339 additions & 0 deletions
Large diffs are not rendered by default.

src/import_map.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757

5858
"puppeteer": "https://deno.land/x/puppeteer@9.0.2/mod.ts",
5959

60+
"pagefind": "npm:/pagefind@1.3.0",
6061
"pdfjs-dist": "npm:pdfjs-dist@4.9.155",
6162

6263
"https://deno.land/std@0.196.0/console/unicode_width.ts": "https://deno.land/std@0.224.0/console/unicode_width.ts",

src/project/types/website/website-search.ts

Lines changed: 266 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import { existsSync } from "../../../deno_ral/fs.ts";
88
import { basename, join, relative } from "../../../deno_ral/path.ts";
9+
import * as pagefind from "pagefind";
910

1011
// currently not building the index here so not using fuse
1112
// @ deno-types="fuse/dist/fuse.d.ts"
@@ -90,22 +91,52 @@ const kLimit = "limit";
9091
// Whether to show the parent in the search results
9192
const kShowItemContext = "show-item-context";
9293

93-
// Any aloglia configuration
94+
// Any algolia configuration
9495
const kAlgolia = "algolia";
9596

97+
// The search engine backend
98+
const kEngine = "engine";
99+
100+
// Pagefind-specific configuration
101+
const kPagefind = "pagefind";
102+
const kRootSelector = "root-selector";
103+
const kExcludeSelectors = "exclude-selectors";
104+
const kForceLanguage = "force-language";
105+
const kRanking = "ranking";
106+
const kPageLength = "page-length";
107+
const kTermFrequency = "term-frequency";
108+
const kTermSaturation = "term-saturation";
109+
const kTermSimilarity = "term-similarity";
110+
111+
export type SearchEngine = "fuse" | "pagefind" | "algolia";
112+
96113
interface SearchOptions {
97114
[kLocation]: SearchInputLocation;
98115
[kCopyButton]: boolean;
99116
[kCollapseAfter]: boolean | number;
100117
[kType]: "textbox" | "overlay";
101118
[kPanelPlacement]: "start" | "end" | "full-width" | "input-wrapper-width";
102119
[kLimit]?: number;
120+
[kEngine]?: SearchEngine;
103121
[kAlgolia]?: SearchOptionsAlgolia;
122+
[kPagefind]?: SearchOptionsPagefind;
104123
[kLanguageDefaults]?: FormatLanguage;
105124
[kKbShortcutSearch]?: string[];
106125
[kShowItemContext]?: boolean | "parent" | "root" | "tree";
107126
}
108127

128+
interface SearchOptionsPagefind {
129+
[kRootSelector]?: string;
130+
[kExcludeSelectors]?: string[];
131+
[kForceLanguage]?: string;
132+
[kRanking]?: {
133+
[kPageLength]?: number;
134+
[kTermFrequency]?: number;
135+
[kTermSaturation]?: number;
136+
[kTermSimilarity]?: number;
137+
};
138+
}
139+
109140
const kSearchOnlyApiKey = "search-only-api-key";
110141
const kSearchApplicationId = "application-id";
111142
const kSearchParams = "params";
@@ -425,6 +456,172 @@ export async function updateSearchIndex(
425456
}
426457
}
427458

459+
export async function runPagefindIndex(
460+
context: ProjectContext,
461+
outputFiles: ProjectOutputFile[],
462+
) {
463+
const outputDir = projectOutputDir(context);
464+
465+
// Get pagefind-specific options
466+
const options = await searchOptions(context);
467+
const pagefindOpts = options?.[kPagefind];
468+
469+
// Annotate HTML files with breadcrumbs and search exclusions before indexing
470+
await annotateHtmlForPagefind(context, outputFiles);
471+
472+
// Use the statically imported pagefind module
473+
474+
// Build the createIndex config
475+
const rootSelector = pagefindOpts?.[kRootSelector] ?? "main";
476+
const defaultExcludeSelectors = [
477+
"nav[role='doc-toc']",
478+
"#title-block-header",
479+
"script",
480+
"style",
481+
".sidebar",
482+
".quarto-title-block",
483+
];
484+
const userExcludeSelectors = pagefindOpts?.[kExcludeSelectors] ?? [];
485+
const excludeSelectors = [
486+
...defaultExcludeSelectors,
487+
...userExcludeSelectors,
488+
];
489+
490+
const indexConfig: Record<string, unknown> = {
491+
rootSelector,
492+
excludeSelectors,
493+
};
494+
if (pagefindOpts?.[kForceLanguage]) {
495+
indexConfig.forceLanguage = pagefindOpts[kForceLanguage];
496+
}
497+
498+
// Create index
499+
const { index, errors: createErrors } = await pagefind.createIndex(
500+
indexConfig,
501+
);
502+
if (createErrors.length > 0) {
503+
warning("Pagefind index creation warnings: " + createErrors.join(", "));
504+
}
505+
if (!index) {
506+
warning("Pagefind failed to create index");
507+
return;
508+
}
509+
510+
// Index the output directory
511+
const { errors: addErrors, page_count } = await index.addDirectory({
512+
path: outputDir,
513+
});
514+
if (addErrors.length > 0) {
515+
warning("Pagefind indexing warnings: " + addErrors.join(", "));
516+
}
517+
518+
// Write the pagefind bundle to the output directory
519+
const pagefindOutputPath = join(outputDir, "pagefind");
520+
const { errors: writeErrors } = await index.writeFiles({
521+
outputPath: pagefindOutputPath,
522+
});
523+
if (writeErrors.length > 0) {
524+
warning(
525+
"Pagefind write warnings: " + writeErrors.join(", "),
526+
);
527+
}
528+
529+
await pagefind.close();
530+
}
531+
532+
async function annotateHtmlForPagefind(
533+
context: ProjectContext,
534+
outputFiles: ProjectOutputFile[],
535+
) {
536+
const outputDir = projectOutputDir(context);
537+
const draftMode = projectDraftMode(context);
538+
539+
for (const outputFile of outputFiles) {
540+
// Skip non-HTML files
541+
if (!isHtmlFileOutput(outputFile.format.pandoc)) {
542+
continue;
543+
}
544+
545+
const file = outputFile.file;
546+
const href = pathWithForwardSlashes(relative(outputDir, file));
547+
548+
// Check for search exclusion (same logic as updateSearchIndex)
549+
const index = await resolveInputTargetForOutputFile(
550+
context,
551+
relative(outputDir, outputFile.file),
552+
);
553+
const draft = index ? index.draft : false;
554+
const excluded = outputFile.format.metadata[kSearch] === false ||
555+
(draft === true && !isDraftVisible(draftMode));
556+
557+
// Read the HTML
558+
const html = Deno.readTextFileSync(file);
559+
const doc = new DOMParser().parseFromString(html, "text/html");
560+
if (!doc) continue;
561+
562+
let modified = false;
563+
564+
// For excluded pages, inject data-pagefind-ignore on <body>
565+
if (excluded) {
566+
const body = doc.querySelector("body");
567+
if (body) {
568+
(body as Element).setAttribute("data-pagefind-ignore", "all");
569+
modified = true;
570+
}
571+
}
572+
573+
// Compute and inject breadcrumbs (same logic as updateSearchIndex)
574+
const navHref = `/${href}`;
575+
const sidebar = sidebarForHref(navHref, outputFile.format);
576+
if (sidebar) {
577+
const bc = breadCrumbs(navHref, sidebar);
578+
const crumbTexts = bc.length > 0
579+
? bc.filter((crumb) => crumb.text !== undefined)
580+
.map((crumb) => crumb.text as string)
581+
: [];
582+
583+
// Merge navbar crumbs if applicable
584+
// deno-lint-ignore no-explicit-any
585+
const mergeNavBarSearchCrumbs = (outputFile.format.metadata as any)
586+
?.website?.search?.["merge-navbar-crumbs"];
587+
if (mergeNavBarSearchCrumbs !== false && crumbTexts.length > 0) {
588+
const navItem = navbarItemForSidebar(sidebar, outputFile.format);
589+
if (navItem && typeof navItem === "object") {
590+
const navbarParentText = (navItem as NavigationItemObject).text;
591+
if (
592+
navbarParentText && crumbTexts.length > 0 &&
593+
crumbTexts[0] !== navbarParentText
594+
) {
595+
crumbTexts.unshift(navbarParentText);
596+
}
597+
}
598+
}
599+
600+
if (crumbTexts.length > 0) {
601+
const mainEl = doc.querySelector("main");
602+
if (mainEl) {
603+
const meta = doc.createElement("meta");
604+
(meta as Element).setAttribute(
605+
"data-pagefind-meta",
606+
`crumbs:${crumbTexts.join("||")}`,
607+
);
608+
mainEl.insertBefore(meta, mainEl.firstChild);
609+
modified = true;
610+
}
611+
}
612+
}
613+
614+
// Write back if modified
615+
if (modified) {
616+
// Serialize back to HTML, preserving the original doctype
617+
const doctype = html.match(/^<!DOCTYPE[^>]*>/i)?.[0] ?? "";
618+
const serialized = doctype + "\n" +
619+
(doc.documentElement?.outerHTML ?? "");
620+
Deno.writeTextFileSync(file, serialized);
621+
}
622+
}
623+
}
624+
428625
const kDefaultCollapse = 3;
429626

430627
export async function searchOptions(
@@ -444,14 +641,23 @@ export async function searchOptions(
444641
? kDefaultCollapse
445642
: false;
446643

644+
// Determine the search engine
645+
const algolia = algoliaOptions(searchMetadata, project);
646+
const engine = searchEngine(searchMetadata, algolia);
647+
const pagefindOpts = engine === "pagefind"
648+
? pagefindOptions(searchMetadata)
649+
: undefined;
650+
447651
return {
448652
[kLocation]: location,
449653
[kCopyButton]: searchMetadata[kCopyButton] === true,
450654
[kCollapseAfter]: collapseMatches,
451655
[kPanelPlacement]: location === "navbar" ? "end" : "start",
452656
[kType]: searchType(searchMetadata[kType], location),
453657
[kLimit]: searchInputLimit(searchMetadata),
454-
[kAlgolia]: algoliaOptions(searchMetadata, project),
658+
[kEngine]: engine,
659+
[kAlgolia]: algolia,
660+
[kPagefind]: pagefindOpts,
455661
[kKbShortcutSearch]: searchKbdShortcut(searchMetadata),
456662
[kShowItemContext]: searchShowItemContext(searchMetadata),
457663
};
@@ -565,6 +771,61 @@ function algoliaOptions(
565771
}
566772
}
567773

774+
function searchEngine(
775+
searchConfig: Record<string, unknown>,
776+
algolia: SearchOptionsAlgolia | undefined,
777+
): SearchEngine {
778+
const engineRaw = searchConfig[kEngine];
779+
if (typeof engineRaw === "string") {
780+
if (engineRaw === "pagefind" || engineRaw === "algolia" || engineRaw === "fuse") {
781+
return engineRaw;
782+
}
783+
}
784+
// Auto-detect algolia when algolia config is present (backward compat)
785+
if (algolia) {
786+
return "algolia";
787+
}
788+
return "fuse";
789+
}
790+
791+
function pagefindOptions(
792+
searchConfig: Record<string, unknown>,
793+
): SearchOptionsPagefind | undefined {
794+
const pagefindRaw = searchConfig[kPagefind];
795+
if (pagefindRaw && typeof pagefindRaw === "object") {
796+
const pagefindObj = pagefindRaw as Record<string, unknown>;
797+
const result: SearchOptionsPagefind = {};
798+
if (typeof pagefindObj[kRootSelector] === "string") {
799+
result[kRootSelector] = pagefindObj[kRootSelector] as string;
800+
}
801+
if (Array.isArray(pagefindObj[kExcludeSelectors])) {
802+
result[kExcludeSelectors] = pagefindObj[kExcludeSelectors] as string[];
803+
}
804+
if (typeof pagefindObj[kForceLanguage] === "string") {
805+
result[kForceLanguage] = pagefindObj[kForceLanguage] as string;
806+
}
807+
const rankingRaw = pagefindObj[kRanking];
808+
if (rankingRaw && typeof rankingRaw === "object") {
809+
const r = rankingRaw as Record<string, unknown>;
810+
result[kRanking] = {};
811+
if (typeof r[kPageLength] === "number") {
812+
result[kRanking][kPageLength] = r[kPageLength] as number;
813+
}
814+
if (typeof r[kTermFrequency] === "number") {
815+
result[kRanking][kTermFrequency] = r[kTermFrequency] as number;
816+
}
817+
if (typeof r[kTermSaturation] === "number") {
818+
result[kRanking][kTermSaturation] = r[kTermSaturation] as number;
819+
}
820+
if (typeof r[kTermSimilarity] === "number") {
821+
result[kRanking][kTermSimilarity] = r[kTermSimilarity] as number;
822+
}
823+
}
824+
return result;
825+
}
826+
return undefined;
827+
}
828+
568829
export async function searchInputLocation(
569830
project: ProjectContext,
570831
): Promise<SearchInputLocation> {
@@ -658,7 +919,9 @@ export async function websiteSearchDependency(
658919

659920
const scripts = [
660921
searchDependency("autocomplete.umd.js"),
661-
searchDependency("fuse.min.js"),
922+
...(options[kEngine] !== "pagefind"
923+
? [searchDependency("fuse.min.js")]
924+
: []),
662925
searchDependency("quarto-search.js"),
663926
];
664927

0 commit comments

Comments
 (0)