Skip to content

Commit e789132

Browse files
committed
pdf-server: import saved form field values from PDF as baseline
getFieldObjects() returns the PDF's stored form values but we were only reading field IDs/pages from it. After saving a filled form and reopening, the panel showed nothing and there was no way to see what was filled. - New pdfBaselineFormValues map, populated in buildFieldNameMap() from each field's .value (skipping empty/Off/button values). Seeds formFieldValues so the panel shows PDF-stored values on open. - computeDiff takes an optional baselineFormFields param and only includes values that differ — opening a filled PDF doesn't mark dirty, editing a field does, reverting to the PDF's value marks clean again. - importFieldValue() normalises radio-group value lookup (parent entry has value=undefined, children have the real export value), checkbox→ true, listbox array→joined string.
1 parent bd4086a commit e789132

3 files changed

Lines changed: 77 additions & 1 deletion

File tree

examples/pdf-server/src/mcp-app.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ const imageCache = new Map<string, HTMLImageElement>();
8383

8484
/** Annotations imported from the PDF file (baseline for diff computation). */
8585
let pdfBaselineAnnotations: PdfAnnotationDef[] = [];
86+
/** Form field values stored in the PDF file itself (baseline for diff computation). */
87+
const pdfBaselineFormValues = new Map<string, string | boolean>();
8688

8789
// Dirty flag — tracks unsaved local changes
8890
let isDirty = false;
@@ -3161,6 +3163,7 @@ function persistAnnotations(): void {
31613163
pdfBaselineAnnotations,
31623164
currentAnnotations,
31633165
formFieldValues,
3166+
pdfBaselineFormValues,
31643167
);
31653168

31663169
// Dirty tracks whether there are unsaved changes. Undoing back to baseline
@@ -3222,6 +3225,25 @@ function restoreAnnotations(): void {
32223225
// PDF.js Form Field Name → ID Mapping
32233226
// =============================================================================
32243227

3228+
/**
3229+
* Extract a meaningful value from a getFieldObjects() field array.
3230+
* Returns null for empty/unfilled/button fields so they don't clutter
3231+
* the panel or count as baseline edits.
3232+
*/
3233+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
3234+
function importFieldValue(fieldArr: any[]): string | boolean | null {
3235+
// For radio groups, getFieldObjects returns a parent entry with
3236+
// value=undefined plus child entries — find the first with a real value.
3237+
const f = fieldArr.find((x) => x.value != null) ?? fieldArr[0];
3238+
if (!f || f.type === "button") return null;
3239+
const v = f.value;
3240+
if (v == null || v === "" || v === "Off") return null;
3241+
if (f.type === "checkbox") return true;
3242+
if (f.type === "radiobutton") return String(v);
3243+
if (Array.isArray(v)) return v.join(", ");
3244+
return String(v);
3245+
}
3246+
32253247
/** Build mapping from field names (used by fill_form) to annotation IDs (used by annotationStorage). */
32263248
async function buildFieldNameMap(
32273249
doc: pdfjsLib.PDFDocumentProxy,
@@ -3231,6 +3253,7 @@ async function buildFieldNameMap(
32313253
fieldNameToLabel.clear();
32323254
fieldNameToOrder.clear();
32333255
cachedFieldObjects = null;
3256+
pdfBaselineFormValues.clear();
32343257
try {
32353258
const fieldObjects = await doc.getFieldObjects();
32363259
cachedFieldObjects = fieldObjects as Record<string, any[]> | null;
@@ -3247,6 +3270,16 @@ async function buildFieldNameMap(
32473270
if (firstField && typeof firstField.page === "number") {
32483271
fieldNameToPage.set(name, firstField.page + 1);
32493272
}
3273+
// Import baseline value (already saved in the PDF). Skip button-type
3274+
// fields and empty/Off values that represent "unfilled".
3275+
const v = importFieldValue(fieldArr);
3276+
if (v !== null) {
3277+
pdfBaselineFormValues.set(name, v);
3278+
// Seed current state from baseline so the panel shows it. A
3279+
// restored localStorage diff (applied later in restoreAnnotations)
3280+
// will overwrite specific fields the user changed.
3281+
if (!formFieldValues.has(name)) formFieldValues.set(name, v);
3282+
}
32503283
}
32513284
}
32523285
} catch {
@@ -4333,6 +4366,7 @@ async function reloadPdf(): Promise<void> {
43334366
undoStack.length = 0;
43344367
redoStack.length = 0;
43354368
pdfBaselineAnnotations = [];
4369+
pdfBaselineFormValues.clear();
43364370
pageTextCache.clear();
43374371
pageTextItemsCache.clear();
43384372
allMatches = [];

examples/pdf-server/src/pdf-annotations.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,37 @@ describe("computeDiff", () => {
278278
expect(diff.formFields).toEqual({ name: "Alice", agree: true });
279279
});
280280

281+
it("omits form fields matching baseline", () => {
282+
const baseline = new Map<string, string | boolean>([
283+
["name", "Alice"],
284+
["agree", true],
285+
]);
286+
const fields = new Map<string, string | boolean>([
287+
["name", "Alice"], // unchanged
288+
["agree", false], // changed
289+
["email", "a@b"], // new
290+
]);
291+
const diff = computeDiff([], [], fields, baseline);
292+
expect(diff.formFields).toEqual({ agree: false, email: "a@b" });
293+
});
294+
295+
it("records fields cleared from baseline", () => {
296+
const baseline = new Map<string, string | boolean>([["name", "Alice"]]);
297+
const fields = new Map<string, string | boolean>(); // cleared
298+
const diff = computeDiff([], [], fields, baseline);
299+
expect(diff.formFields).toEqual({ name: "" });
300+
});
301+
302+
it("produces empty diff when all form values match baseline", () => {
303+
const baseline = new Map<string, string | boolean>([
304+
["name", "Alice"],
305+
["agree", true],
306+
]);
307+
const diff = computeDiff([], [], new Map(baseline), baseline);
308+
expect(diff.formFields).toEqual({});
309+
expect(isDiffEmpty(diff)).toBe(true);
310+
});
311+
281312
it("round-trips through mergeAnnotations", () => {
282313
const userStamp: PdfAnnotationDef = {
283314
type: "stamp",

examples/pdf-server/src/pdf-annotations.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ export function computeDiff(
282282
pdfAnnotations: PdfAnnotationDef[],
283283
currentAnnotations: PdfAnnotationDef[],
284284
formFields: Map<string, string | boolean>,
285+
baselineFormFields?: Map<string, string | boolean>,
285286
): AnnotationDiff {
286287
const pdfIds = new Set(pdfAnnotations.map((a) => a.id));
287288
const currentIds = new Set(currentAnnotations.map((a) => a.id));
@@ -294,11 +295,21 @@ export function computeDiff(
294295
.filter((a) => !currentIds.has(a.id))
295296
.map((a) => a.id);
296297

297-
// Form fields
298+
// Form fields: only values that differ from what's already in the PDF.
299+
// Without a baseline, every filled field is a user edit (back-compat).
298300
const formFieldsObj: Record<string, string | boolean> = {};
299301
for (const [k, v] of formFields) {
302+
if (baselineFormFields?.get(k) === v) continue;
300303
formFieldsObj[k] = v;
301304
}
305+
// Fields present in baseline but cleared in current are also a change
306+
if (baselineFormFields) {
307+
for (const [k, v] of baselineFormFields) {
308+
if (!formFields.has(k) && v !== "" && v !== false) {
309+
formFieldsObj[k] = formFields.get(k) ?? "";
310+
}
311+
}
312+
}
302313

303314
return { added, removed, formFields: formFieldsObj };
304315
}

0 commit comments

Comments
 (0)