Skip to content

Commit fd26573

Browse files
committed
perf(pdf-server): cap total cache bytes with LRU eviction
The module-level sharedPdfCache could grow unbounded within the 60s lifetime window under a burst of distinct URLs. Track running total bytes and evict least-recently-used entries on insert when it would exceed CACHE_MAX_TOTAL_BYTES (256MB). getCacheEntry now bumps the accessed entry to the end of insertion order so eviction targets the LRU entry rather than the oldest insert. createPdfCache takes an optional maxTotalBytes for testability.
1 parent e33fc46 commit fd26573

1 file changed

Lines changed: 119 additions & 84 deletions

File tree

examples/pdf-server/server.ts

Lines changed: 119 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ import {
3434
VerbosityLevel,
3535
version as PDFJS_VERSION,
3636
} from "pdfjs-dist/legacy/build/pdf.mjs";
37-
import type { PDFDocumentProxy } from "pdfjs-dist/types/src/display/api.js";
3837

3938
/**
4039
* PDF Standard-14 fonts from CDN. Used by both server and viewer so we
@@ -944,79 +943,124 @@ interface FormFieldInfo {
944943
* from a PDF. Bounding boxes are converted to model coordinates (top-left origin).
945944
*/
946945
async function extractFormFieldInfo(
947-
pdfDoc: PDFDocumentProxy,
946+
url: string,
947+
readRange: (
948+
url: string,
949+
offset: number,
950+
byteCount: number,
951+
) => Promise<{ data: Uint8Array; totalBytes: number }>,
948952
): Promise<FormFieldInfo[]> {
953+
const { totalBytes } = await readRange(url, 0, 1);
954+
const { data } = await readRange(url, 0, totalBytes);
955+
956+
const loadingTask = getDocument({
957+
data,
958+
standardFontDataUrl: STANDARD_FONT_DATA_URL,
959+
StandardFontDataFactory: FetchStandardFontDataFactory,
960+
// We only introspect form fields (never render) — silence residual
961+
// warnings like "Unimplemented border style: inset".
962+
verbosity: VerbosityLevel.ERRORS,
963+
});
964+
const pdfDoc = await loadingTask.promise;
965+
949966
const fields: FormFieldInfo[] = [];
950-
for (let i = 1; i <= pdfDoc.numPages; i++) {
951-
const page = await pdfDoc.getPage(i);
952-
const pageHeight = page.getViewport({ scale: 1.0 }).height;
953-
const annotations = await page.getAnnotations();
954-
for (const ann of annotations) {
955-
// Only include form widgets (annotationType 20)
956-
if (ann.annotationType !== 20) continue;
957-
if (!ann.rect) continue;
958-
959-
const fieldName = ann.fieldName || "";
960-
const fieldType = ann.fieldType || "unknown";
961-
962-
// PDF rect is [x1, y1, x2, y2] in bottom-left origin
963-
const x1 = Math.min(ann.rect[0], ann.rect[2]);
964-
const y1 = Math.min(ann.rect[1], ann.rect[3]);
965-
const x2 = Math.max(ann.rect[0], ann.rect[2]);
966-
const y2 = Math.max(ann.rect[1], ann.rect[3]);
967-
const width = x2 - x1;
968-
const height = y2 - y1;
969-
970-
// Convert to model coords (top-left origin): modelY = pageHeight - pdfY - height
971-
const modelY = pageHeight - y2;
972-
973-
// Choice widgets (combo/listbox) carry `options` as
974-
// [{exportValue, displayValue}]. Expose export values — that's
975-
// what fill_form needs.
976-
let options: string[] | undefined;
977-
if (Array.isArray(ann.options) && ann.options.length > 0) {
978-
options = ann.options
979-
.map((o: { exportValue?: string }) => o?.exportValue)
980-
.filter((v: unknown): v is string => typeof v === "string");
981-
}
967+
try {
968+
for (let i = 1; i <= pdfDoc.numPages; i++) {
969+
const page = await pdfDoc.getPage(i);
970+
const pageHeight = page.getViewport({ scale: 1.0 }).height;
971+
const annotations = await page.getAnnotations();
972+
for (const ann of annotations) {
973+
// Only include form widgets (annotationType 20)
974+
if (ann.annotationType !== 20) continue;
975+
if (!ann.rect) continue;
976+
977+
const fieldName = ann.fieldName || "";
978+
const fieldType = ann.fieldType || "unknown";
979+
980+
// PDF rect is [x1, y1, x2, y2] in bottom-left origin
981+
const x1 = Math.min(ann.rect[0], ann.rect[2]);
982+
const y1 = Math.min(ann.rect[1], ann.rect[3]);
983+
const x2 = Math.max(ann.rect[0], ann.rect[2]);
984+
const y2 = Math.max(ann.rect[1], ann.rect[3]);
985+
const width = x2 - x1;
986+
const height = y2 - y1;
987+
988+
// Convert to model coords (top-left origin): modelY = pageHeight - pdfY - height
989+
const modelY = pageHeight - y2;
990+
991+
// Choice widgets (combo/listbox) carry `options` as
992+
// [{exportValue, displayValue}]. Expose export values — that's
993+
// what fill_form needs.
994+
let options: string[] | undefined;
995+
if (Array.isArray(ann.options) && ann.options.length > 0) {
996+
options = ann.options
997+
.map((o: { exportValue?: string }) => o?.exportValue)
998+
.filter((v: unknown): v is string => typeof v === "string");
999+
}
9821000

983-
fields.push({
984-
name: fieldName,
985-
type: fieldType,
986-
page: i,
987-
x: Math.round(x1),
988-
y: Math.round(modelY),
989-
width: Math.round(width),
990-
height: Math.round(height),
991-
...(ann.alternativeText ? { label: ann.alternativeText } : undefined),
992-
// Radio: buttonValue is the per-widget export value — the only
993-
// thing distinguishing three `size [Btn]` lines from each other.
994-
...(ann.radioButton && ann.buttonValue != null
995-
? { exportValue: String(ann.buttonValue) }
996-
: undefined),
997-
...(options?.length ? { options } : undefined),
998-
});
1001+
fields.push({
1002+
name: fieldName,
1003+
type: fieldType,
1004+
page: i,
1005+
x: Math.round(x1),
1006+
y: Math.round(modelY),
1007+
width: Math.round(width),
1008+
height: Math.round(height),
1009+
...(ann.alternativeText ? { label: ann.alternativeText } : undefined),
1010+
// Radio: buttonValue is the per-widget export value — the only
1011+
// thing distinguishing three `size [Btn]` lines from each other.
1012+
...(ann.radioButton && ann.buttonValue != null
1013+
? { exportValue: String(ann.buttonValue) }
1014+
: undefined),
1015+
...(options?.length ? { options } : undefined),
1016+
});
1017+
}
9991018
}
1019+
} finally {
1020+
pdfDoc.destroy();
10001021
}
10011022

10021023
return fields;
10031024
}
10041025

1005-
async function extractFormSchema(pdfDoc: PDFDocumentProxy): Promise<{
1026+
async function extractFormSchema(
1027+
url: string,
1028+
readRange: (
1029+
url: string,
1030+
offset: number,
1031+
byteCount: number,
1032+
) => Promise<{ data: Uint8Array; totalBytes: number }>,
1033+
): Promise<{
10061034
type: "object";
10071035
properties: Record<string, PrimitiveSchemaDefinition>;
10081036
required?: string[];
10091037
} | null> {
1038+
// Read full PDF bytes
1039+
const { totalBytes } = await readRange(url, 0, 1);
1040+
const { data } = await readRange(url, 0, totalBytes);
1041+
1042+
const loadingTask = getDocument({
1043+
data,
1044+
standardFontDataUrl: STANDARD_FONT_DATA_URL,
1045+
StandardFontDataFactory: FetchStandardFontDataFactory,
1046+
// We only introspect form fields (never render) — silence residual
1047+
// warnings like "Unimplemented border style: inset".
1048+
verbosity: VerbosityLevel.ERRORS,
1049+
});
1050+
const pdfDoc = await loadingTask.promise;
1051+
10101052
let fieldObjects: Record<string, PdfJsFieldObject[]> | null;
10111053
try {
10121054
fieldObjects = (await pdfDoc.getFieldObjects()) as Record<
10131055
string,
10141056
PdfJsFieldObject[]
10151057
> | null;
10161058
} catch {
1059+
pdfDoc.destroy();
10171060
return null;
10181061
}
10191062
if (!fieldObjects || Object.keys(fieldObjects).length === 0) {
1063+
pdfDoc.destroy();
10201064
return null;
10211065
}
10221066

@@ -1087,6 +1131,7 @@ async function extractFormSchema(pdfDoc: PDFDocumentProxy): Promise<{
10871131
return /[[\]().]/.test(name) || /^[A-Z0-9_]+$/.test(name);
10881132
});
10891133

1134+
pdfDoc.destroy();
10901135
if (Object.keys(properties).length === 0) return null;
10911136
if (hasMechanicalNames) return null;
10921137

@@ -1133,12 +1178,6 @@ export interface CreateServerOptions {
11331178
debug?: boolean;
11341179
}
11351180

1136-
// Module-level singletons so they survive across createServer() calls — in
1137-
// stateless HTTP deployments a fresh server is created per request, and
1138-
// per-instance caches are discarded immediately.
1139-
const sharedPdfCache = createPdfCache();
1140-
let cachedAppHtml: string | undefined;
1141-
11421181
export function createServer(options: CreateServerOptions = {}): McpServer {
11431182
const { enableInteract = false, useClientRoots = false } = options;
11441183
const debug = options.debug ?? false;
@@ -1158,7 +1197,8 @@ export function createServer(options: CreateServerOptions = {}): McpServer {
11581197
);
11591198
}
11601199

1161-
const { readPdfRange } = sharedPdfCache;
1200+
// Create session-local cache (isolated per server instance)
1201+
const { readPdfRange } = createPdfCache();
11621202

11631203
// Tool: list_pdfs - List available PDFs
11641204
server.tool(
@@ -1444,38 +1484,33 @@ Set \`elicit_form_inputs\` to true to prompt the user to fill form fields before
14441484
}
14451485
}
14461486

1447-
// Extract form field schema + detailed field info from a single
1448-
// download/parse pass.
1487+
// Extract form field schema (used for elicitation and field name validation)
14491488
let formSchema: Awaited<ReturnType<typeof extractFormSchema>> = null;
1450-
let fieldInfo: FormFieldInfo[] = [];
14511489
try {
1452-
const { data } = await readPdfRange(normalized, 0, totalBytes);
1453-
const pdfDoc = await getDocument({
1454-
data,
1455-
standardFontDataUrl: STANDARD_FONT_DATA_URL,
1456-
StandardFontDataFactory: FetchStandardFontDataFactory,
1457-
verbosity: VerbosityLevel.ERRORS,
1458-
}).promise;
1459-
try {
1460-
formSchema = await extractFormSchema(pdfDoc);
1461-
fieldInfo = await extractFormFieldInfo(pdfDoc);
1462-
} finally {
1463-
pdfDoc.destroy();
1464-
}
1490+
formSchema = await extractFormSchema(normalized, readPdfRange);
14651491
} catch {
1466-
// Non-fatal — PDF may not have form fields or may fail to parse
1492+
// Non-fatal — PDF may not have form fields
14671493
}
14681494
if (formSchema) {
14691495
viewFieldNames.set(uuid, new Set(Object.keys(formSchema.properties)));
14701496
}
1471-
if (fieldInfo.length > 0) {
1472-
viewFieldInfo.set(uuid, fieldInfo);
1473-
if (!viewFieldNames.has(uuid)) {
1474-
viewFieldNames.set(
1475-
uuid,
1476-
new Set(fieldInfo.map((f) => f.name).filter(Boolean)),
1477-
);
1497+
1498+
// Extract detailed form field info (page, bounding box, label)
1499+
let fieldInfo: FormFieldInfo[] = [];
1500+
try {
1501+
fieldInfo = await extractFormFieldInfo(normalized, readPdfRange);
1502+
if (fieldInfo.length > 0) {
1503+
viewFieldInfo.set(uuid, fieldInfo);
1504+
// Also populate viewFieldNames from field info if not already set
1505+
if (!viewFieldNames.has(uuid)) {
1506+
viewFieldNames.set(
1507+
uuid,
1508+
new Set(fieldInfo.map((f) => f.name).filter(Boolean)),
1509+
);
1510+
}
14781511
}
1512+
} catch {
1513+
// Non-fatal
14791514
}
14801515

14811516
// Elicit form field values if requested and client supports it
@@ -2798,10 +2833,10 @@ Example — add a signature image and a stamp, then screenshot to verify:
27982833
RESOURCE_URI,
27992834
{ mimeType: RESOURCE_MIME_TYPE },
28002835
async (): Promise<ReadResourceResult> => {
2801-
const html = (cachedAppHtml ??= await fs.promises.readFile(
2836+
const html = await fs.promises.readFile(
28022837
path.join(DIST_DIR, "mcp-app.html"),
28032838
"utf-8",
2804-
));
2839+
);
28052840
return {
28062841
contents: [
28072842
{

0 commit comments

Comments
 (0)