Skip to content

Commit 688d3a3

Browse files
andiwandclaude
andcommitted
PDF stage 4.9: axial & radial shadings (types 2/3)
Render axial (type 2) and radial (type 3) shadings as SVG gradients, both via the `sh` operator and via `/PatternType 2` shading patterns selected by `scn`. - `pdf_shading.{hpp,cpp}`: `parse_shading` resolves a `/Shading` dictionary, pre-sampling its tint `/Function` across `/Domain` into 32 sRGB colour stops (no function evaluator needed at render time). Types other than 2/3 and malformed shadings return null; `/Extend`, `/Background` and `/BBox` are parsed. - Parser: `parse_resources` now builds the `/Shading` and `/Pattern` resource tables (after `/ColorSpace`, so named colour spaces resolve). A shading pattern resolves its `/Shading`; a tiling pattern is recognized (rendered in 4.10). `GraphicsState::Color` carries the selected `/Pattern` name. - Extractor: `scn` records the pattern name; `paint_path` resolves a shading pattern to `PathElement::fill_shading` + the pattern `/Matrix`; the `sh` operator emits a `ShadingElement` flooding the current clip. - HTML: a `GradientRegistry` emits `<linearGradient>`/`<radialGradient>` defs with `gradientUnits="userSpaceOnUse"`; a shading-pattern fill paints the path through `fill="url(#…)"`, and `sh` paints a clipped `<rect>`. `/Extend` is approximated by SVG's `pad` spread. Tests: shading parsing (axial/radial, domain/extend, background, unsupported type, short coords, bad function) and extractor wiring (shading-pattern fill, unknown pattern, `sh` element, unknown shading). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 1299bea commit 688d3a3

14 files changed

Lines changed: 825 additions & 32 deletions

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ set(ODR_SOURCE_FILES
200200
"src/odr/internal/pdf/pdf_object.cpp"
201201
"src/odr/internal/pdf/pdf_object_parser.cpp"
202202
"src/odr/internal/pdf/pdf_page_extractor.cpp"
203+
"src/odr/internal/pdf/pdf_shading.cpp"
203204

204205
"src/odr/internal/font/cff_builder.cpp"
205206
"src/odr/internal/font/cff_font.cpp"

src/odr/internal/html/pdf_file.cpp

Lines changed: 138 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,18 @@ std::string device_color_to_css(const pdf::GraphicsState::Color &color) {
7979
return std::move(s).str();
8080
}
8181

82+
/// Convert an sRGB triple in [0, 1] (a shading colour stop) to a CSS
83+
/// `rgb(...)`.
84+
std::string rgb_to_css(const std::array<double, 3> &rgb) {
85+
const auto to255 = [](const double v) {
86+
return static_cast<int>(std::lround(std::clamp(v, 0.0, 1.0) * 255.0));
87+
};
88+
std::ostringstream s;
89+
s << "rgb(" << to255(rgb[0]) << ',' << to255(rgb[1]) << ',' << to255(rgb[2])
90+
<< ')';
91+
return std::move(s).str();
92+
}
93+
8294
/// Build an SVG `d` attribute from a path's subpaths, each point mapped through
8395
/// `to_box` (PDF user space -> the page box, y-down). Lines become `L`, cubic
8496
/// Béziers `C`, and an explicitly closed subpath ends with `Z`.
@@ -117,10 +129,12 @@ std::string svg_path_d(const std::vector<pdf::Subpath> &subpaths,
117129
/// stroke carries width (CTM-scaled in user space), caps, joins, miter limit
118130
/// and the dash pattern. A zero stroke width renders as a thin hairline.
119131
/// `clip_id`, when non-empty, references a `<clipPath>` installed via
120-
/// `clip-path`.
132+
/// `clip-path`. `gradient_id`, when non-empty, fills the path with that
133+
/// gradient (a shading pattern) instead of `fill_color`.
121134
std::string svg_path_fragment(const pdf::PathElement &path,
122135
const util::math::Transform2D &to_box,
123-
const std::string &clip_id) {
136+
const std::string &clip_id,
137+
const std::string &gradient_id) {
124138
if ((!path.fill && !path.stroke) || path.subpaths.empty()) {
125139
return {};
126140
}
@@ -131,7 +145,11 @@ std::string svg_path_fragment(const pdf::PathElement &path,
131145
}
132146

133147
if (path.fill) {
134-
f << " fill=\"" << device_color_to_css(path.fill_color) << '"';
148+
if (!gradient_id.empty()) {
149+
f << " fill=\"url(#" << gradient_id << ")\"";
150+
} else {
151+
f << " fill=\"" << device_color_to_css(path.fill_color) << '"';
152+
}
135153
if (path.even_odd) {
136154
f << " fill-rule=\"evenodd\"";
137155
}
@@ -253,6 +271,87 @@ class ClipRegistry {
253271
std::ostringstream m_defs;
254272
};
255273

274+
/// Registers a page's shadings (axial/radial) as `<linearGradient>`/
275+
/// `<radialGradient>` defs, deduplicating by shading and placement. The
276+
/// shading's pre-sampled colour stops become `<stop>`s; `gradientTransform`
277+
/// (shading space -> page box) places the gradient in the page's user space, so
278+
/// referencing elements use `gradientUnits="userSpaceOnUse"`. PDF `/Extend` is
279+
/// approximated by SVG's default `pad` spread (the end stops extend outward).
280+
/// Ids are namespaced per page (`g<page>_<n>`).
281+
class GradientRegistry {
282+
public:
283+
explicit GradientRegistry(std::uint32_t page) : m_page{page} {}
284+
285+
/// The gradient id to reference via `fill="url(#id)"` for `shading` placed by
286+
/// `m` (shading space -> page box). Empty for an unrepresentable shading.
287+
std::string register_gradient(const pdf::Shading &shading,
288+
const util::math::Transform2D &m) {
289+
if ((shading.type != 2 && shading.type != 3) || shading.stops.empty()) {
290+
return {};
291+
}
292+
std::ostringstream sig;
293+
sig << shading.type << ':' << static_cast<const void *>(&shading) << ':'
294+
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << m.e << ','
295+
<< m.f;
296+
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
297+
if (!inserted) {
298+
return it->second;
299+
}
300+
it->second = "g" + std::to_string(m_page) + "_" + std::to_string(++m_count);
301+
const std::string &id = it->second;
302+
303+
const std::array<double, 6> &c = shading.coords;
304+
if (shading.type == 2) {
305+
m_defs << "<linearGradient id=\"" << id << "\" x1=\"" << c[0]
306+
<< "\" y1=\"" << c[1] << "\" x2=\"" << c[2] << "\" y2=\"" << c[3]
307+
<< '"';
308+
} else {
309+
// Radial: the outer circle (x1,y1,r1) is SVG's (cx,cy,r); the inner
310+
// circle (x0,y0,r0) is the focal point and radius (fr is SVG2).
311+
m_defs << "<radialGradient id=\"" << id << "\" cx=\"" << c[3]
312+
<< "\" cy=\"" << c[4] << "\" r=\"" << c[5] << "\" fx=\"" << c[0]
313+
<< "\" fy=\"" << c[1] << "\" fr=\"" << c[2] << '"';
314+
}
315+
m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\"matrix("
316+
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ','
317+
<< round2(m.e) << ',' << round2(m.f) << ")\">";
318+
for (const pdf::GradientStop &stop : shading.stops) {
319+
m_defs << "<stop offset=\"" << round2(stop.offset) << "\" stop-color=\""
320+
<< rgb_to_css(stop.rgb) << "\"/>";
321+
}
322+
m_defs << (shading.type == 2 ? "</linearGradient>" : "</radialGradient>");
323+
return id;
324+
}
325+
326+
[[nodiscard]] std::string defs() const { return m_defs.str(); }
327+
328+
private:
329+
std::uint32_t m_page;
330+
std::uint32_t m_count{0};
331+
std::unordered_map<std::string, std::string> m_id_by_signature;
332+
std::ostringstream m_defs;
333+
};
334+
335+
/// Serialize an `sh` shading flood to an SVG `<rect>` covering the page box,
336+
/// filled with `gradient_id` and bounded by `clip_id` (the clip in force at
337+
/// `sh` time). Returns "" when the shading produced no gradient. The rect spans
338+
/// the whole page; the clip (and the gradient's own extent) bound the paint.
339+
std::string svg_shading_fragment(const std::string &gradient_id,
340+
const std::string &clip_id, double width,
341+
double height) {
342+
if (gradient_id.empty()) {
343+
return {};
344+
}
345+
std::ostringstream f;
346+
f << "<rect x=\"0\" y=\"0\" width=\"" << round2(width) << "\" height=\""
347+
<< round2(height) << "\" fill=\"url(#" << gradient_id << ")\"";
348+
if (!clip_id.empty()) {
349+
f << " clip-path=\"url(#" << clip_id << ")\"";
350+
}
351+
f << "/>";
352+
return std::move(f).str();
353+
}
354+
256355
/// Deduplicates CSS declarations into atomic, single-property classes. PDF text
257356
/// emits one absolutely-positioned span per glyph run, and the same font sizes,
258357
/// offsets and spacings recur across the (potentially millions of) spans.
@@ -575,14 +674,41 @@ class HtmlServiceImpl final : public HtmlService {
575674
util::math::Transform2D::scaling_translation(1, -1, 0, height);
576675

577676
ClipRegistry clips(static_cast<std::uint32_t>(pages_out.size()));
677+
GradientRegistry gradients(static_cast<std::uint32_t>(pages_out.size()));
578678

579679
for (const pdf::PageElement &element :
580680
pdf::extract_page(stream, *page->resources, *m_logger)) {
581681
// A painted path: serialize its subpaths to an SVG `<path>` fragment in
582-
// the page viewBox (fill and/or stroke), under any active clip.
682+
// the page viewBox (fill and/or stroke), under any active clip. A
683+
// shading-pattern fill is painted through a gradient instead of a
684+
// colour.
583685
if (const auto *path = std::get_if<pdf::PathElement>(&element)) {
584686
const std::string clip_id = clips.register_clip(path->clip, to_box);
585-
std::string fragment = svg_path_fragment(*path, to_box, clip_id);
687+
std::string gradient_id;
688+
if (path->fill_shading != nullptr) {
689+
gradient_id = gradients.register_gradient(
690+
*path->fill_shading, path->shading_transform * to_box);
691+
}
692+
std::string fragment =
693+
svg_path_fragment(*path, to_box, clip_id, gradient_id);
694+
if (!fragment.empty()) {
695+
page_out.items.push_back(PathOut{std::move(fragment)});
696+
}
697+
continue;
698+
}
699+
700+
// An `sh` shading flood: a `<rect>` over the page box filled with the
701+
// shading's gradient, bounded by the clip in force at `sh` time.
702+
if (const auto *shading = std::get_if<pdf::ShadingElement>(&element)) {
703+
if (shading->shading == nullptr) {
704+
continue;
705+
}
706+
const std::string clip_id =
707+
clips.register_clip(shading->clip, to_box);
708+
const std::string gradient_id = gradients.register_gradient(
709+
*shading->shading, shading->transform * to_box);
710+
std::string fragment =
711+
svg_shading_fragment(gradient_id, clip_id, width, height);
586712
if (!fragment.empty()) {
587713
page_out.items.push_back(PathOut{std::move(fragment)});
588714
}
@@ -788,7 +914,8 @@ class HtmlServiceImpl final : public HtmlService {
788914
}
789915
}
790916

791-
page_out.clip_defs = clips.defs();
917+
// Clip-path and gradient defs share the page's hidden `<svg><defs>`.
918+
page_out.clip_defs = clips.defs() + gradients.defs();
792919
}
793920

794921
// Post-pass: every page has been scanned, so the per-font used-scalar sets
@@ -934,10 +1061,11 @@ class HtmlServiceImpl final : public HtmlService {
9341061
for (const PageOut &page : pages_out) {
9351062
out.write_element_begin("div",
9361063
HtmlElementOptions().set_class(page.classes));
937-
// Clip-path defs for this page, in a hidden zero-size `<svg>`. They are
938-
// referenced by id from the page's path fragments; `clipPathUnits`
939-
// defaults to `userSpaceOnUse`, so the geometry is read in the user space
940-
// of the referencing element (the page viewBox), not this `<svg>`.
1064+
// Clip-path and gradient defs for this page, in a hidden zero-size
1065+
// `<svg>`. They are referenced by id from the page's fragments;
1066+
// `clipPathUnits`/`gradientUnits` are `userSpaceOnUse`, so the geometry
1067+
// is read in the user space of the referencing element (the page
1068+
// viewBox), not this `<svg>`.
9411069
if (!page.clip_defs.empty()) {
9421070
out.write_raw(
9431071
"<svg width=\"0\" height=\"0\" style=\"position:absolute\">"

src/odr/internal/pdf/AGENTS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,15 @@ stage exists to avoid.
574574
current fill colour; `/SMask` and `/Mask` (stencil + colour-key) composited
575575
into RGBA on the raster path (a mask on a JPEG base is ignored — decoding the
576576
JPEG to composite is out of scope).
577+
- **Shadings & shading patterns** (axial type 2, radial type 3): `parse_shading`
578+
pre-samples the tint `/Function` across `/Domain` into 32 sRGB colour stops, so
579+
the renderer needs no function evaluator. The `sh` operator floods the current
580+
clip (a `ShadingElement``<rect>` filled with the gradient); a `/PatternType
581+
2` shading pattern selected by `scn` fills a path (`PathElement::fill_shading`
582+
+ the pattern `/Matrix`). Both emit SVG `<linearGradient>`/`<radialGradient>`
583+
with `gradientUnits="userSpaceOnUse"`; `/Extend` is approximated by SVG's `pad`
584+
spread. Mesh/function shadings (types 1, 4–7) and tiling patterns
585+
(`/PatternType 1`) are still future stages.
577586
- **SVG residue** — where no 1:1 primitive exists; all at generation time, never
578587
rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small
579588
flat polygons (pdf.js's approach); color spaces

src/odr/internal/pdf/pdf_document_element.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <odr/internal/pdf/pdf_cmap.hpp>
44
#include <odr/internal/pdf/pdf_encoding.hpp>
55
#include <odr/internal/pdf/pdf_object.hpp>
6+
#include <odr/internal/pdf/pdf_shading.hpp>
67
#include <odr/internal/util/math_util.hpp>
78

89
#include <array>
@@ -28,6 +29,7 @@ struct Annotation;
2829
struct Resources;
2930
struct Font;
3031
struct XObject;
32+
struct Pattern;
3133
struct ColorSpaceDef;
3234

3335
struct Element {
@@ -95,6 +97,13 @@ struct Resources final : Element {
9597
/// referenced by `BDC`. Each value is the resolved property-list dictionary
9698
/// `Object`; used to recover `/ActualText` for a `BDC /Tag /Name` sequence.
9799
std::unordered_map<std::string, Object> properties;
100+
/// The `/Shading` subdictionary (ISO 32000-1 8.7.4.3): named shadings painted
101+
/// by the `sh` operator. Resolved eagerly (the tint function sampled into
102+
/// colour stops) so extraction needs no parser handle.
103+
std::unordered_map<std::string, std::shared_ptr<Shading>> shading;
104+
/// The `/Pattern` subdictionary (ISO 32000-1 8.7.3.3): named tiling/shading
105+
/// patterns selected as a colour by `scn`/`SCN` in a `/Pattern` colour space.
106+
std::unordered_map<std::string, Pattern *> pattern;
98107
};
99108

100109
/// An external object referenced by `Do` and listed in a resource dictionary's
@@ -145,6 +154,27 @@ struct XObject final : Element {
145154
std::vector<double> stencil_decode; ///< `/Decode`, empty = default `[0 1]`
146155
};
147156

157+
/// A pattern listed in a resource dictionary's `/Pattern` subdictionary
158+
/// (ISO 32000-1 8.7.3), selected as a colour by `scn`/`SCN` in a `/Pattern`
159+
/// colour space. Shading patterns (`/PatternType 2`) paint a gradient through
160+
/// the path; tiling patterns (`/PatternType 1`) repeat a content-stream cell.
161+
struct Pattern final : Element {
162+
enum class Type {
163+
unknown,
164+
tiling, ///< `/PatternType 1`
165+
shading, ///< `/PatternType 2`
166+
};
167+
Type type{Type::unknown};
168+
169+
/// `/Matrix` mapping pattern space to the default coordinate system of the
170+
/// pattern's parent content stream (8.7.3.1); default identity.
171+
util::math::Transform2D matrix;
172+
173+
/// Shading pattern (`/PatternType 2`): the shading painted through the path,
174+
/// pre-resolved (its tint function sampled into stops). Null otherwise.
175+
std::shared_ptr<Shading> shading;
176+
};
177+
148178
/// A non-owning view over a string of PDF character codes, splitting it into
149179
/// fixed-width (`Font::code_byte_width()`) big-endian codes on iteration. Holds
150180
/// only a `string_view`, so it must not outlive the underlying bytes; iterate

src/odr/internal/pdf/pdf_document_parser.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,79 @@ XObject *parse_x_object(State &state, const ObjectReference &reference) {
759759
return x_object;
760760
}
761761

762+
/// A `ColorSpaceContext` over the parser, resolving a base/alternate space
763+
/// named by name against the (being-built) `/ColorSpace` table of `resources`.
764+
ColorSpaceContext make_color_space_context(DocumentParser &parser,
765+
const Resources *resources) {
766+
ColorSpaceContext context;
767+
context.resolve = [&parser](const Object &object) {
768+
return parser.resolve_object_copy(object);
769+
};
770+
context.load_stream = [&parser](const Object &object) {
771+
return object.is_reference()
772+
? parser.read_decoded_stream(object.as_reference())
773+
: std::string{};
774+
};
775+
context.named =
776+
[resources](const std::string &name) -> std::shared_ptr<ColorSpaceDef> {
777+
const auto it = resources->color_space.find(name);
778+
return it != resources->color_space.end() ? it->second : nullptr;
779+
};
780+
return context;
781+
}
782+
783+
/// Parse a `/Shading` dictionary into a resolved `Shading` (its tint function
784+
/// sampled into colour stops). `resources` supplies named colour spaces.
785+
std::shared_ptr<Shading> parse_shading_resource(State &state,
786+
const Object &object,
787+
const Resources *resources) {
788+
DocumentParser &parser = state.parser();
789+
ShadingContext context;
790+
context.resolve = [&parser](const Object &o) {
791+
return parser.resolve_object_copy(o);
792+
};
793+
context.load_stream = [&parser](const Object &o) {
794+
return o.is_reference() ? parser.read_decoded_stream(o.as_reference())
795+
: std::string{};
796+
};
797+
return parse_shading(object, context,
798+
make_color_space_context(parser, resources));
799+
}
800+
801+
/// Parse a `/Pattern` entry. A shading pattern (`/PatternType 2`) resolves its
802+
/// `/Shading`; a tiling pattern (`/PatternType 1`) is recognized here and its
803+
/// content rendered in a later stage. `/Matrix` is taken either way.
804+
Pattern *parse_pattern(State &state, const ObjectReference &reference,
805+
const Resources *resources) {
806+
DocumentParser &parser = state.parser();
807+
Document &document = state.document();
808+
809+
auto *pattern = document.create_element<Pattern>();
810+
IndirectObject object = parser.read_object(reference);
811+
if (!object.object.is_dictionary()) {
812+
return pattern;
813+
}
814+
const Dictionary &dictionary = object.object.as_dictionary();
815+
pattern->object_reference = reference;
816+
pattern->object = Object(dictionary);
817+
818+
if (dictionary.has_value("Matrix")) {
819+
pattern->matrix = parse_matrix(parser, dictionary["Matrix"]);
820+
}
821+
const auto pattern_type = static_cast<std::int32_t>(
822+
parser.resolve_object_copy(dictionary.get("PatternType"))
823+
.as_integer_opt()
824+
.value_or(0));
825+
if (pattern_type == 2) {
826+
pattern->type = Pattern::Type::shading;
827+
pattern->shading =
828+
parse_shading_resource(state, dictionary.get("Shading"), resources);
829+
} else if (pattern_type == 1) {
830+
pattern->type = Pattern::Type::tiling;
831+
}
832+
return pattern;
833+
}
834+
762835
Resources *parse_resources(State &state, const Object &object) {
763836
DocumentParser &parser = state.parser();
764837
Document &document = state.document();
@@ -820,6 +893,29 @@ Resources *parse_resources(State &state, const Object &object) {
820893
}
821894
}
822895

896+
// Shadings and patterns are parsed after `/ColorSpace` so a named colour
897+
// space they reference is already in `resources->color_space`.
898+
if (dictionary.has_value("Shading")) {
899+
const Dictionary shading_table =
900+
parser.resolve_object_copy(dictionary["Shading"]).as_dictionary();
901+
for (const auto &[key, value] : shading_table) {
902+
if (auto shading = parse_shading_resource(state, value, resources)) {
903+
resources->shading[key] = std::move(shading);
904+
}
905+
}
906+
}
907+
908+
if (dictionary.has_value("Pattern")) {
909+
const Dictionary pattern_table =
910+
parser.resolve_object_copy(dictionary["Pattern"]).as_dictionary();
911+
for (const auto &[key, value] : pattern_table) {
912+
if (value.is_reference()) {
913+
resources->pattern[key] =
914+
parse_pattern(state, value.as_reference(), resources);
915+
}
916+
}
917+
}
918+
823919
if (dictionary.has_key("Properties") && !dictionary["Properties"].is_null()) {
824920
// Named property lists for `BDC`; resolved eagerly so text extraction can
825921
// recover `/ActualText` without a parser handle (cf. form XObjects).

0 commit comments

Comments
 (0)