Skip to content

Commit e33fc46

Browse files
committed
perf(pdf-server): share cache across server instances and dedupe form parsing
In stateless HTTP deployments createServer() is called per request, so the per-instance pdfCache and the 4 MB viewer HTML were discarded after every call. Hoist both to module scope. Also refactor extractFormSchema/extractFormFieldInfo to accept an already-parsed PDFDocumentProxy so display_pdf downloads and parses the PDF once instead of twice.
1 parent 64b4fa1 commit e33fc46

1 file changed

Lines changed: 84 additions & 119 deletions

File tree

examples/pdf-server/server.ts

Lines changed: 84 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import {
3434
VerbosityLevel,
3535
version as PDFJS_VERSION,
3636
} from "pdfjs-dist/legacy/build/pdf.mjs";
37+
import type { PDFDocumentProxy } from "pdfjs-dist/types/src/display/api.js";
3738

3839
/**
3940
* PDF Standard-14 fonts from CDN. Used by both server and viewer so we
@@ -943,124 +944,79 @@ interface FormFieldInfo {
943944
* from a PDF. Bounding boxes are converted to model coordinates (top-left origin).
944945
*/
945946
async function extractFormFieldInfo(
946-
url: string,
947-
readRange: (
948-
url: string,
949-
offset: number,
950-
byteCount: number,
951-
) => Promise<{ data: Uint8Array; totalBytes: number }>,
947+
pdfDoc: PDFDocumentProxy,
952948
): Promise<FormFieldInfo[]> {
953-
const { totalBytes } = await readRange(url, 0, 1);
954-
const { data } = await readRange(url, 0, totalBytes);
955-
956-
const loadingTask = getDocument({
957-
data,
958-
standardFontDataUrl: STANDARD_FONT_DATA_URL,
959-
StandardFontDataFactory: FetchStandardFontDataFactory,
960-
// We only introspect form fields (never render) — silence residual
961-
// warnings like "Unimplemented border style: inset".
962-
verbosity: VerbosityLevel.ERRORS,
963-
});
964-
const pdfDoc = await loadingTask.promise;
965-
966949
const fields: FormFieldInfo[] = [];
967-
try {
968-
for (let i = 1; i <= pdfDoc.numPages; i++) {
969-
const page = await pdfDoc.getPage(i);
970-
const pageHeight = page.getViewport({ scale: 1.0 }).height;
971-
const annotations = await page.getAnnotations();
972-
for (const ann of annotations) {
973-
// Only include form widgets (annotationType 20)
974-
if (ann.annotationType !== 20) continue;
975-
if (!ann.rect) continue;
976-
977-
const fieldName = ann.fieldName || "";
978-
const fieldType = ann.fieldType || "unknown";
979-
980-
// PDF rect is [x1, y1, x2, y2] in bottom-left origin
981-
const x1 = Math.min(ann.rect[0], ann.rect[2]);
982-
const y1 = Math.min(ann.rect[1], ann.rect[3]);
983-
const x2 = Math.max(ann.rect[0], ann.rect[2]);
984-
const y2 = Math.max(ann.rect[1], ann.rect[3]);
985-
const width = x2 - x1;
986-
const height = y2 - y1;
987-
988-
// Convert to model coords (top-left origin): modelY = pageHeight - pdfY - height
989-
const modelY = pageHeight - y2;
990-
991-
// Choice widgets (combo/listbox) carry `options` as
992-
// [{exportValue, displayValue}]. Expose export values — that's
993-
// what fill_form needs.
994-
let options: string[] | undefined;
995-
if (Array.isArray(ann.options) && ann.options.length > 0) {
996-
options = ann.options
997-
.map((o: { exportValue?: string }) => o?.exportValue)
998-
.filter((v: unknown): v is string => typeof v === "string");
999-
}
1000-
1001-
fields.push({
1002-
name: fieldName,
1003-
type: fieldType,
1004-
page: i,
1005-
x: Math.round(x1),
1006-
y: Math.round(modelY),
1007-
width: Math.round(width),
1008-
height: Math.round(height),
1009-
...(ann.alternativeText ? { label: ann.alternativeText } : undefined),
1010-
// Radio: buttonValue is the per-widget export value — the only
1011-
// thing distinguishing three `size [Btn]` lines from each other.
1012-
...(ann.radioButton && ann.buttonValue != null
1013-
? { exportValue: String(ann.buttonValue) }
1014-
: undefined),
1015-
...(options?.length ? { options } : undefined),
1016-
});
950+
for (let i = 1; i <= pdfDoc.numPages; i++) {
951+
const page = await pdfDoc.getPage(i);
952+
const pageHeight = page.getViewport({ scale: 1.0 }).height;
953+
const annotations = await page.getAnnotations();
954+
for (const ann of annotations) {
955+
// Only include form widgets (annotationType 20)
956+
if (ann.annotationType !== 20) continue;
957+
if (!ann.rect) continue;
958+
959+
const fieldName = ann.fieldName || "";
960+
const fieldType = ann.fieldType || "unknown";
961+
962+
// PDF rect is [x1, y1, x2, y2] in bottom-left origin
963+
const x1 = Math.min(ann.rect[0], ann.rect[2]);
964+
const y1 = Math.min(ann.rect[1], ann.rect[3]);
965+
const x2 = Math.max(ann.rect[0], ann.rect[2]);
966+
const y2 = Math.max(ann.rect[1], ann.rect[3]);
967+
const width = x2 - x1;
968+
const height = y2 - y1;
969+
970+
// Convert to model coords (top-left origin): modelY = pageHeight - pdfY - height
971+
const modelY = pageHeight - y2;
972+
973+
// Choice widgets (combo/listbox) carry `options` as
974+
// [{exportValue, displayValue}]. Expose export values — that's
975+
// what fill_form needs.
976+
let options: string[] | undefined;
977+
if (Array.isArray(ann.options) && ann.options.length > 0) {
978+
options = ann.options
979+
.map((o: { exportValue?: string }) => o?.exportValue)
980+
.filter((v: unknown): v is string => typeof v === "string");
1017981
}
982+
983+
fields.push({
984+
name: fieldName,
985+
type: fieldType,
986+
page: i,
987+
x: Math.round(x1),
988+
y: Math.round(modelY),
989+
width: Math.round(width),
990+
height: Math.round(height),
991+
...(ann.alternativeText ? { label: ann.alternativeText } : undefined),
992+
// Radio: buttonValue is the per-widget export value — the only
993+
// thing distinguishing three `size [Btn]` lines from each other.
994+
...(ann.radioButton && ann.buttonValue != null
995+
? { exportValue: String(ann.buttonValue) }
996+
: undefined),
997+
...(options?.length ? { options } : undefined),
998+
});
1018999
}
1019-
} finally {
1020-
pdfDoc.destroy();
10211000
}
10221001

10231002
return fields;
10241003
}
10251004

1026-
async function extractFormSchema(
1027-
url: string,
1028-
readRange: (
1029-
url: string,
1030-
offset: number,
1031-
byteCount: number,
1032-
) => Promise<{ data: Uint8Array; totalBytes: number }>,
1033-
): Promise<{
1005+
async function extractFormSchema(pdfDoc: PDFDocumentProxy): Promise<{
10341006
type: "object";
10351007
properties: Record<string, PrimitiveSchemaDefinition>;
10361008
required?: string[];
10371009
} | null> {
1038-
// Read full PDF bytes
1039-
const { totalBytes } = await readRange(url, 0, 1);
1040-
const { data } = await readRange(url, 0, totalBytes);
1041-
1042-
const loadingTask = getDocument({
1043-
data,
1044-
standardFontDataUrl: STANDARD_FONT_DATA_URL,
1045-
StandardFontDataFactory: FetchStandardFontDataFactory,
1046-
// We only introspect form fields (never render) — silence residual
1047-
// warnings like "Unimplemented border style: inset".
1048-
verbosity: VerbosityLevel.ERRORS,
1049-
});
1050-
const pdfDoc = await loadingTask.promise;
1051-
10521010
let fieldObjects: Record<string, PdfJsFieldObject[]> | null;
10531011
try {
10541012
fieldObjects = (await pdfDoc.getFieldObjects()) as Record<
10551013
string,
10561014
PdfJsFieldObject[]
10571015
> | null;
10581016
} catch {
1059-
pdfDoc.destroy();
10601017
return null;
10611018
}
10621019
if (!fieldObjects || Object.keys(fieldObjects).length === 0) {
1063-
pdfDoc.destroy();
10641020
return null;
10651021
}
10661022

@@ -1131,7 +1087,6 @@ async function extractFormSchema(
11311087
return /[[\]().]/.test(name) || /^[A-Z0-9_]+$/.test(name);
11321088
});
11331089

1134-
pdfDoc.destroy();
11351090
if (Object.keys(properties).length === 0) return null;
11361091
if (hasMechanicalNames) return null;
11371092

@@ -1178,6 +1133,12 @@ export interface CreateServerOptions {
11781133
debug?: boolean;
11791134
}
11801135

1136+
// Module-level singletons so they survive across createServer() calls — in
1137+
// stateless HTTP deployments a fresh server is created per request, and
1138+
// per-instance caches are discarded immediately.
1139+
const sharedPdfCache = createPdfCache();
1140+
let cachedAppHtml: string | undefined;
1141+
11811142
export function createServer(options: CreateServerOptions = {}): McpServer {
11821143
const { enableInteract = false, useClientRoots = false } = options;
11831144
const debug = options.debug ?? false;
@@ -1197,8 +1158,7 @@ export function createServer(options: CreateServerOptions = {}): McpServer {
11971158
);
11981159
}
11991160

1200-
// Create session-local cache (isolated per server instance)
1201-
const { readPdfRange } = createPdfCache();
1161+
const { readPdfRange } = sharedPdfCache;
12021162

12031163
// Tool: list_pdfs - List available PDFs
12041164
server.tool(
@@ -1484,33 +1444,38 @@ Set \`elicit_form_inputs\` to true to prompt the user to fill form fields before
14841444
}
14851445
}
14861446

1487-
// Extract form field schema (used for elicitation and field name validation)
1447+
// Extract form field schema + detailed field info from a single
1448+
// download/parse pass.
14881449
let formSchema: Awaited<ReturnType<typeof extractFormSchema>> = null;
1450+
let fieldInfo: FormFieldInfo[] = [];
14891451
try {
1490-
formSchema = await extractFormSchema(normalized, readPdfRange);
1452+
const { data } = await readPdfRange(normalized, 0, totalBytes);
1453+
const pdfDoc = await getDocument({
1454+
data,
1455+
standardFontDataUrl: STANDARD_FONT_DATA_URL,
1456+
StandardFontDataFactory: FetchStandardFontDataFactory,
1457+
verbosity: VerbosityLevel.ERRORS,
1458+
}).promise;
1459+
try {
1460+
formSchema = await extractFormSchema(pdfDoc);
1461+
fieldInfo = await extractFormFieldInfo(pdfDoc);
1462+
} finally {
1463+
pdfDoc.destroy();
1464+
}
14911465
} catch {
1492-
// Non-fatal — PDF may not have form fields
1466+
// Non-fatal — PDF may not have form fields or may fail to parse
14931467
}
14941468
if (formSchema) {
14951469
viewFieldNames.set(uuid, new Set(Object.keys(formSchema.properties)));
14961470
}
1497-
1498-
// Extract detailed form field info (page, bounding box, label)
1499-
let fieldInfo: FormFieldInfo[] = [];
1500-
try {
1501-
fieldInfo = await extractFormFieldInfo(normalized, readPdfRange);
1502-
if (fieldInfo.length > 0) {
1503-
viewFieldInfo.set(uuid, fieldInfo);
1504-
// Also populate viewFieldNames from field info if not already set
1505-
if (!viewFieldNames.has(uuid)) {
1506-
viewFieldNames.set(
1507-
uuid,
1508-
new Set(fieldInfo.map((f) => f.name).filter(Boolean)),
1509-
);
1510-
}
1471+
if (fieldInfo.length > 0) {
1472+
viewFieldInfo.set(uuid, fieldInfo);
1473+
if (!viewFieldNames.has(uuid)) {
1474+
viewFieldNames.set(
1475+
uuid,
1476+
new Set(fieldInfo.map((f) => f.name).filter(Boolean)),
1477+
);
15111478
}
1512-
} catch {
1513-
// Non-fatal
15141479
}
15151480

15161481
// Elicit form field values if requested and client supports it
@@ -2833,10 +2798,10 @@ Example — add a signature image and a stamp, then screenshot to verify:
28332798
RESOURCE_URI,
28342799
{ mimeType: RESOURCE_MIME_TYPE },
28352800
async (): Promise<ReadResourceResult> => {
2836-
const html = await fs.promises.readFile(
2801+
const html = (cachedAppHtml ??= await fs.promises.readFile(
28372802
path.join(DIST_DIR, "mcp-app.html"),
28382803
"utf-8",
2839-
);
2804+
));
28402805
return {
28412806
contents: [
28422807
{

0 commit comments

Comments
 (0)