diff --git a/fpdfsdk/fpdf_doc.cpp b/fpdfsdk/fpdf_doc.cpp index 799c5a592..c9f84a07b 100644 --- a/fpdfsdk/fpdf_doc.cpp +++ b/fpdfsdk/fpdf_doc.cpp @@ -773,4 +773,63 @@ EPDF_GetMetaKeyName(FPDF_DOCUMENT document, } } return 0; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveXMPMetadata(FPDF_DOCUMENT document) { + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pDoc) + return false; + + RetainPtr root = pDoc->GetMutableRoot(); + if (!root) + return false; + + // /Metadata is the catalog-level XMP stream (ISO 32000 §14.3.2). It is stored + // separately from /Info, so clearing Info via EPDF_SetMetaText() does not + // touch it. Removing the key drops the XMP from the document. + root->RemoveFor("Metadata"); + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveEmbeddedThumbnails(FPDF_DOCUMENT document) { + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pDoc) + return false; + + const int count = pDoc->GetPageCount(); + for (int i = 0; i < count; ++i) { + RetainPtr page = pDoc->GetMutablePageDictionary(i); + if (page) + page->RemoveFor("Thumb"); + } + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveAllJavaScript(FPDF_DOCUMENT document) { + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pDoc) + return false; + + RetainPtr root = pDoc->GetMutableRoot(); + if (!root) + return false; + + // (1) Catalog /Names /JavaScript name tree (document-level scripts). + RetainPtr names = root->GetMutableDictFor("Names"); + if (names) + names->RemoveFor("JavaScript"); + + // (2) /OpenAction, but only when it is a JavaScript action — a GoTo + // destination OpenAction is legitimate navigation and is left intact. + RetainPtr open_action = root->GetDictFor("OpenAction"); + if (open_action && open_action->GetNameFor("S") == "JavaScript") + root->RemoveFor("OpenAction"); + + // (3) Catalog-level /AA additional-actions (e.g. WillClose/WillPrint scripts). + root->RemoveFor("AA"); + + return true; } \ No newline at end of file diff --git a/fpdfsdk/fpdf_editpage.cpp b/fpdfsdk/fpdf_editpage.cpp index 06ed956c3..464ef4da6 100644 --- a/fpdfsdk/fpdf_editpage.cpp +++ b/fpdfsdk/fpdf_editpage.cpp @@ -5,6 +5,7 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "public/fpdf_edit.h" +#include "public/fpdfview.h" #include #include @@ -18,6 +19,7 @@ #include "core/fpdfapi/page/cpdf_form.h" #include "core/fpdfapi/page/cpdf_formobject.h" #include "core/fpdfapi/page/cpdf_imageobject.h" +#include "core/fpdfapi/page/cpdf_occontext.h" #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_pageimagecache.h" #include "core/fpdfapi/page/cpdf_pageobject.h" @@ -1218,3 +1220,55 @@ FPDFFormObj_RemoveObject(FPDF_PAGEOBJECT form_object, removed_object.release(); return true; } + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveOptionalContentGroups(FPDF_DOCUMENT document) { + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pDoc) { + return false; + } + + // Visibility under the default (View) configuration: an object is "hidden" + // when an OFF optional-content group (or OCMD / VE expression) suppresses it. + auto oc = pdfium::MakeRetain(pDoc, CPDF_OCContext::kView); + + const int page_count = FPDF_GetPageCount(document); + for (int i = 0; i < page_count; ++i) { + // FPDF_LoadPage parses the page content, so the object list is populated. + FPDF_PAGE page = FPDF_LoadPage(document, i); + if (!page) { + continue; + } + + CPDF_Page* pPage = CPDFPageFromFPDFPage(page); + if (IsPageObject(pPage)) { + // Collect hidden objects first, then remove, so the list is not mutated + // mid-iteration. + std::vector hidden; + const size_t count = pPage->GetPageObjectCount(); + for (size_t k = 0; k < count; ++k) { + CPDF_PageObject* obj = pPage->GetPageObjectByIndex(k); + if (obj && !oc->CheckPageObjectVisible(obj)) { + hidden.push_back(obj); + } + } + if (!hidden.empty()) { + for (CPDF_PageObject* obj : hidden) { + // Dropping the returned unique_ptr frees the removed object. + pPage->RemovePageObject(obj); + } + CPDF_PageContentGenerator generator(pPage); + generator.GenerateContent(); + } + } + + FPDF_ClosePage(page); + } + + // With the governed content removed, drop the optional-content machinery. + RetainPtr root = pDoc->GetMutableRoot(); + if (root) { + root->RemoveFor("OCProperties"); + } + return true; +} diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h index 2feee5303..a4dd9a8de 100644 --- a/public/fpdf_doc.h +++ b/public/fpdf_doc.h @@ -517,6 +517,53 @@ EPDF_GetMetaKeyName(FPDF_DOCUMENT document, void* buffer, unsigned long buflen); +// Experimental EmbedPDF Extension API. +// Remove the document's XMP metadata stream (the catalog /Metadata entry). +// +// document - handle to the document. +// +// XMP metadata (ISO 32000 §14.3.2) is stored separately from the Info +// dictionary, so clearing Info via EPDF_SetMetaText() does not remove it. This +// is the #1 redaction-sanitization miss: author/title/history can survive in +// XMP. Returns true on success, including when no /Metadata is present. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveXMPMetadata(FPDF_DOCUMENT document); + +// Experimental EmbedPDF Extension API. +// Remove every page's embedded thumbnail (the page /Thumb entry). +// +// document - handle to the document. +// +// An embedded thumbnail can retain a pre-redaction image of the page. Returns +// true on success, including when no thumbnails are present. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveEmbeddedThumbnails(FPDF_DOCUMENT document); + +// Experimental EmbedPDF Extension API. +// Remove all document-level JavaScript from |document|: the catalog +// /Names /JavaScript name tree, /OpenAction when it is a JavaScript action +// (GoTo destinations are preserved), and the catalog /AA additional-actions +// dictionary. +// +// document - handle to the document. +// +// Returns true on success, including when no JavaScript is present. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveAllJavaScript(FPDF_DOCUMENT document); + +// Experimental EmbedPDF Extension API. +// Remove content governed by hidden optional-content groups (OCGs / layers): +// for each page, drop page objects not visible under the default (View) +// configuration, regenerate the page content, then remove the catalog +// /OCProperties. This excises hidden-layer content rather than merely deleting +// /OCProperties (which would make that content visible). +// +// document - handle to the document. +// +// Returns true on success, including when no optional content is present. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDF_RemoveOptionalContentGroups(FPDF_DOCUMENT document); + // Experimental EmbedPDF Extension API. // Create a new destination array of the form [page /XYZ left top zoom]. //