@@ -525,12 +525,110 @@ Element *parse_page_or_pages(State &state, const ObjectReference &reference,
525525// back the in-progress element, so the in-memory graph mirrors the file.
526526Resources *parse_resources (State &state, const Object &object);
527527
528+ // / Read an integer image-dictionary entry (e.g. `/Width`), resolving an
529+ // / indirect reference, defaulting to `fallback`.
530+ std::int32_t image_int (DocumentParser &parser, const Dictionary &dictionary,
531+ const std::string &key, std::int32_t fallback) {
532+ return static_cast <std::int32_t >(
533+ parser.resolve_object_copy (dictionary.get (key))
534+ .as_integer_opt ()
535+ .value_or (fallback));
536+ }
537+
538+ // / The `/Decode` array of an image dictionary as doubles ([] when absent).
539+ std::vector<double > image_decode (DocumentParser &parser,
540+ const Dictionary &dictionary) {
541+ std::vector<double > decode;
542+ const Object decode_object =
543+ parser.resolve_object_copy (dictionary.get (" Decode" ));
544+ if (decode_object.is_array ()) {
545+ for (const Object &item : decode_object.as_array ()) {
546+ decode.push_back (item.as_real ());
547+ }
548+ }
549+ return decode;
550+ }
551+
552+ // / Resolve a `/SMask` (soft mask) or stencil `/Mask` sub-image referenced by
553+ // / `mask` into a base-sized alpha plane (ISO 32000-1 11.6.5.2 / 8.9.6.3). The
554+ // / sub-image is a single-component raster: decode its `/Filter` chain, then map
555+ // / its samples to coverage (`decode_mask_alpha`). Returns empty when `mask` is
556+ // / not a stream reference or its codec is not decodable (CCITT/JBIG2/JPX), so
557+ // / the base image stays opaque.
558+ std::vector<std::uint8_t > resolve_mask_alpha (DocumentParser &parser,
559+ const Object &mask,
560+ std::int32_t base_width,
561+ std::int32_t base_height,
562+ bool stencil) {
563+ if (!mask.is_reference ()) {
564+ return {};
565+ }
566+ const IndirectObject &object = parser.read_object (mask.as_reference ());
567+ if (!object.object .is_dictionary ()) {
568+ return {};
569+ }
570+ const Dictionary &dictionary = object.object .as_dictionary ();
571+ Object filter;
572+ if (dictionary.has_key (" Filter" )) {
573+ filter = parser.deep_resolve_object_copy (dictionary[" Filter" ]);
574+ }
575+ Object decode_parms;
576+ if (dictionary.has_key (" DecodeParms" )) {
577+ decode_parms = parser.deep_resolve_object_copy (dictionary[" DecodeParms" ]);
578+ }
579+ DecodeResult result =
580+ decode (filter, decode_parms, parser.read_object_stream (object));
581+ if (result.stopped_at_filter .has_value ()) {
582+ return {}; // an image codec we cannot decode (CCITT/JBIG2/JPX)
583+ }
584+ return decode_mask_alpha (
585+ result.data , image_int (parser, dictionary, " Width" , 0 ),
586+ image_int (parser, dictionary, " Height" , 0 ),
587+ image_int (parser, dictionary, " BitsPerComponent" , stencil ? 1 : 8 ),
588+ image_decode (parser, dictionary), stencil, base_width, base_height);
589+ }
590+
591+ // / Carry an `/ImageMask true` stencil's decoded bitmap and geometry onto
592+ // / `x_object` (ISO 32000-1 8.9.6.2). The stencil is painted in the current fill
593+ // / colour, known only at `Do` time, so the page extractor recolours it; here we
594+ // / only decode and stash. An undecodable codec leaves `stencil_mask` false so
595+ // / `Do` skips it.
596+ void parse_stencil_mask (DocumentParser &parser, const Dictionary &dictionary,
597+ const IndirectObject &object, XObject &x_object) {
598+ Object filter;
599+ if (dictionary.has_key (" Filter" )) {
600+ filter = parser.deep_resolve_object_copy (dictionary[" Filter" ]);
601+ }
602+ Object decode_parms;
603+ if (dictionary.has_key (" DecodeParms" )) {
604+ decode_parms = parser.deep_resolve_object_copy (dictionary[" DecodeParms" ]);
605+ }
606+ DecodeResult result =
607+ decode (filter, decode_parms, parser.read_object_stream (object));
608+ if (result.stopped_at_filter .has_value ()) {
609+ return ; // CCITT/JBIG2 fax stencils are not yet decodable
610+ }
611+ const std::int32_t width = image_int (parser, dictionary, " Width" , 0 );
612+ const std::int32_t height = image_int (parser, dictionary, " Height" , 0 );
613+ if (width <= 0 || height <= 0 ) {
614+ return ;
615+ }
616+ x_object.stencil_mask = true ;
617+ x_object.stencil_samples = std::move (result.data );
618+ x_object.stencil_width = width;
619+ x_object.stencil_height = height;
620+ x_object.stencil_decode = image_decode (parser, dictionary);
621+ }
622+
528623// / Build the browser-ready bytes of an image XObject (ISO 32000-1 8.9). A JPEG
529624// / (`DCTDecode`) passes through undecoded; a fully decodable raster
530625// / (Flate/LZW/RunLength/ASCII/raw) is decoded, its samples assembled through
531- // / the image's colour space and re-encoded as an 8-bit RGB PNG. Codecs we
626+ // / the image's colour space and re-encoded as a PNG — RGBA when a `/SMask`,
627+ // / stencil `/Mask` or colour-key `/Mask` supplies transparency. Codecs we
532628// / cannot yet hand off (JPXDecode, CCITTFaxDecode, JBIG2Decode) and unresolved
533- // / colour spaces leave the bytes empty, so `Do` skips the image.
629+ // / colour spaces leave the bytes empty, so `Do` skips the image. A `/SMask` or
630+ // / `/Mask` on a JPEG base is ignored (decoding the JPEG to composite is out of
631+ // / scope).
534632void parse_image_data (DocumentParser &parser, const Dictionary &dictionary,
535633 const IndirectObject &object, XObject &x_object) {
536634 Object filter;
@@ -558,31 +656,38 @@ void parse_image_data(DocumentParser &parser, const Dictionary &dictionary,
558656 };
559657 color_space = parse_color_space (dictionary.get (" ColorSpace" ), context);
560658 }
561- const auto width = static_cast <std::int32_t >(
562- parser.resolve_object_copy (dictionary.get (" Width" ))
563- .as_integer_opt ()
564- .value_or (0 ));
565- const auto height = static_cast <std::int32_t >(
566- parser.resolve_object_copy (dictionary.get (" Height" ))
567- .as_integer_opt ()
568- .value_or (0 ));
569- const auto bits_per_component = static_cast <std::int32_t >(
570- parser.resolve_object_copy (dictionary.get (" BitsPerComponent" ))
571- .as_integer_opt ()
572- .value_or (8 ));
573-
574- std::vector<double > decode_array;
575- const Object decode_object =
576- parser.resolve_object_copy (dictionary.get (" Decode" ));
577- if (decode_object.is_array ()) {
578- for (const Object &item : decode_object.as_array ()) {
579- decode_array.push_back (item.as_real ());
659+ const std::int32_t width = image_int (parser, dictionary, " Width" , 0 );
660+ const std::int32_t height = image_int (parser, dictionary, " Height" , 0 );
661+ const std::int32_t bits_per_component =
662+ image_int (parser, dictionary, " BitsPerComponent" , 8 );
663+ const std::vector<double > decode_array = image_decode (parser, dictionary);
664+
665+ // Transparency (8.9.6 / 11.6.5.2): a `/SMask` (alpha) takes precedence over a
666+ // `/Mask`, which is either a stencil sub-image (a reference) or a colour-key
667+ // array. Each resolves to a base-sized alpha plane or a colour-key range,
668+ // which `encode_image` composites into an RGBA PNG on the raster path.
669+ std::vector<std::uint8_t > alpha;
670+ std::vector<double > color_key;
671+ if (dictionary.has_value (" SMask" )) {
672+ alpha = resolve_mask_alpha (parser, dictionary[" SMask" ], width, height,
673+ /* stencil=*/ false );
674+ }
675+ if (alpha.empty () && dictionary.has_value (" Mask" )) {
676+ const Object mask = parser.resolve_object_copy (dictionary[" Mask" ]);
677+ if (mask.is_array ()) {
678+ for (const Object &item : mask.as_array ()) {
679+ color_key.push_back (item.as_real ());
680+ }
681+ } else if (dictionary[" Mask" ].is_reference ()) {
682+ alpha = resolve_mask_alpha (parser, dictionary[" Mask" ], width, height,
683+ /* stencil=*/ true );
580684 }
581685 }
582686
583- if (std::optional<EncodedImage> encoded = encode_image (
584- parser.read_object_stream (object), filter, decode_parms, width,
585- height, bits_per_component, color_space.get (), decode_array)) {
687+ if (std::optional<EncodedImage> encoded =
688+ encode_image (parser.read_object_stream (object), filter, decode_parms,
689+ width, height, bits_per_component, color_space.get (),
690+ decode_array, alpha, color_key)) {
586691 x_object.image_data = std::move (encoded->data );
587692 x_object.image_mime = std::move (encoded->mime );
588693 }
@@ -614,12 +719,15 @@ XObject *parse_x_object(State &state, const ObjectReference &reference) {
614719 : " " ;
615720 if (subtype == " Image" ) {
616721 x_object->subtype = XObject::Subtype::image;
617- // `/ImageMask` stencils and colour-key masks are a later stage (4.8); leave
618- // their bytes empty so `Do` skips them. Everything else is handed to the
619- // browser as JPEG (pass-through) or PNG (raster), or skipped.
722+ // An `/ImageMask true` stencil is painted in the current fill colour (known
723+ // only at `Do` time), so its bitmap is stashed for the page extractor to
724+ // recolour; everything else is encoded to JPEG/PNG bytes here (with any
725+ // `/SMask`/`/Mask` transparency), or skipped.
620726 const bool image_mask =
621727 dictionary.get (" ImageMask" ).as_bool_opt ().value_or (false );
622- if (!image_mask) {
728+ if (image_mask) {
729+ parse_stencil_mask (parser, dictionary, object, *x_object);
730+ } else {
623731 parse_image_data (parser, dictionary, object, *x_object);
624732 }
625733 return x_object;
0 commit comments