Skip to content

Commit 78cdc3b

Browse files
andiwandclaude
andcommitted
PDF tiling patterns: colour-through-base fix, BBox clip, registry dedup
Follow-ups on the stage-4.10 tiling-pattern work (#574): - Fix uncoloured-pattern colour resolution: `ColorSpaceDef::to_rgb` now defers `ColorSpaceKind::pattern` to its underlying base, so a `/PaintType 2` pattern selected through a named Pattern space (`[/Pattern /DeviceRGB]`) paints in its tint instead of black. Adds a regression test. - Clip each tiling-pattern cell to its `/BBox` so marks outside the cell, or in the gap when a step exceeds the BBox, don't leak into the tile. - Deduplicate the per-page `<defs>` registries (clip/gradient/pattern) behind a shared `DefsRegistry` base, and extract an `svg_matrix` helper for the repeated `matrix(...)` serialization. - Use `has_value` for the pattern `/Resources` lookup, matching `/BBox`. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01Negfa31h7p7vRYJzGUhjdE
1 parent 500327d commit 78cdc3b

5 files changed

Lines changed: 134 additions & 65 deletions

File tree

src/odr/internal/html/pdf_file.cpp

Lines changed: 89 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ namespace {
4141
/// the extra digits add up across a page full of path data.
4242
double round2(const double v) { return std::round(v * 100.0) / 100.0; }
4343

44+
/// Serialize a transform as an SVG `matrix(...)`. Only the translation (e, f)
45+
/// is rounded — it lives in page-box units where 1/100 px is plenty; the linear
46+
/// part (a..d) keeps full precision so small scale/skew factors aren't
47+
/// quantized to zero. Used for `transform`, `gradientTransform` and
48+
/// `patternTransform`.
49+
std::string svg_matrix(const util::math::Transform2D &m) {
50+
std::ostringstream f;
51+
f << "matrix(" << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ','
52+
<< round2(m.e) << ',' << round2(m.f) << ')';
53+
return std::move(f).str();
54+
}
55+
4456
/// Convert a PDF device color to a CSS `rgb(...)` string. Non-device color
4557
/// spaces (Separation/ICCBased/… — stage 4.4) and the unknown space fall back
4658
/// to black, the PDF initial color.
@@ -222,25 +234,60 @@ std::string svg_image_fragment(const pdf::ImageElement &image,
222234
if (!clip_id.empty()) {
223235
f << "<g clip-path=\"url(#" << clip_id << ")\">";
224236
}
225-
f << R"(<image width="1" height="1" preserveAspectRatio="none" transform="matrix()"
226-
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << round2(m.e)
227-
<< ',' << round2(m.f) << ")\"";
237+
f << R"(<image width="1" height="1" preserveAspectRatio="none" transform=")"
238+
<< svg_matrix(m) << '"';
228239
f << " href=\"" << file_to_url(image.data, image.mime) << "\"/>";
229240
if (!clip_id.empty()) {
230241
f << "</g>";
231242
}
232243
return std::move(f).str();
233244
}
234245

246+
/// Shared bookkeeping for the per-page `<defs>` registries below (clips,
247+
/// gradients, tiling patterns): a signature->id cache that deduplicates
248+
/// repeated definitions, a per-page monotonic id counter, and the accumulated
249+
/// `<defs>` markup (emitted once into the page's hidden `<svg>`). Ids are
250+
/// namespaced per page as `<prefix><page>_<n>`.
251+
class DefsRegistry {
252+
public:
253+
explicit DefsRegistry(const std::uint32_t page) : m_page{page} {}
254+
255+
[[nodiscard]] std::string defs() const { return m_defs.str(); }
256+
257+
protected:
258+
/// The id for `signature`, minting `<prefix><page>_<n>` the first time it is
259+
/// seen. `inserted` is true only on that first sight — when the caller still
260+
/// needs to emit the definition into `m_defs`.
261+
struct Entry {
262+
std::string id;
263+
bool inserted;
264+
};
265+
Entry intern(const std::string &signature, const char *prefix) {
266+
const auto [it, inserted] = m_id_by_signature.try_emplace(signature);
267+
if (inserted) {
268+
it->second = std::string(prefix) + std::to_string(m_page) + "_" +
269+
std::to_string(++m_count);
270+
}
271+
return {it->second, inserted};
272+
}
273+
274+
std::ostringstream m_defs;
275+
276+
private:
277+
std::uint32_t m_page;
278+
std::uint32_t m_count{0};
279+
std::unordered_map<std::string, std::string> m_id_by_signature;
280+
};
281+
235282
/// Registers a page's clip regions as nested `<clipPath>` defs, deduplicating
236283
/// shared prefixes. PDF's current clip is the *intersection* of an ordered list
237284
/// of regions; SVG expresses intersection by chaining `clip-path` from one
238285
/// `<clipPath>` to the next, so region i's clipPath references region i-1's and
239286
/// the painted element references the last. Ids are namespaced per page
240-
/// (`c<page>_<n>`); `defs()` is emitted once in a hidden `<svg>` for the page.
241-
class ClipRegistry {
287+
/// (`c<page>_<n>`).
288+
class ClipRegistry : public DefsRegistry {
242289
public:
243-
explicit ClipRegistry(std::uint32_t page) : m_page{page} {}
290+
using DefsRegistry::DefsRegistry;
244291

245292
/// The clipPath id to reference on a path painted under `clip`, registering
246293
/// any not-yet-seen regions. Empty when `clip` is empty (unclipped).
@@ -253,11 +300,9 @@ class ClipRegistry {
253300
signature += region.even_odd ? 'E' : 'N';
254301
signature += d;
255302
signature += ';';
256-
const auto [it, inserted] = m_id_by_signature.try_emplace(signature);
303+
const auto [id, inserted] = intern(signature, "c");
257304
if (inserted) {
258-
it->second =
259-
"c" + std::to_string(m_page) + "_" + std::to_string(++m_count);
260-
m_defs << "<clipPath id=\"" << it->second << '"';
305+
m_defs << "<clipPath id=\"" << id << '"';
261306
if (!parent.empty()) {
262307
m_defs << " clip-path=\"url(#" << parent << ")\"";
263308
}
@@ -267,18 +312,10 @@ class ClipRegistry {
267312
}
268313
m_defs << "/></clipPath>";
269314
}
270-
parent = it->second;
315+
parent = id;
271316
}
272317
return parent;
273318
}
274-
275-
[[nodiscard]] std::string defs() const { return m_defs.str(); }
276-
277-
private:
278-
std::uint32_t m_page;
279-
std::uint32_t m_count{0};
280-
std::unordered_map<std::string, std::string> m_id_by_signature;
281-
std::ostringstream m_defs;
282319
};
283320

284321
/// Registers a page's shadings (axial/radial) as `<linearGradient>`/
@@ -293,9 +330,9 @@ class ClipRegistry {
293330
/// shading is over-painted beyond its interval instead of being masked to it;
294331
/// `Shading::background` and `Shading::bbox` are likewise not yet honoured.
295332
/// Honouring them needs the fill clipped to the gradient band/annulus.
296-
class GradientRegistry {
333+
class GradientRegistry : public DefsRegistry {
297334
public:
298-
explicit GradientRegistry(const std::uint32_t page) : m_page{page} {}
335+
using DefsRegistry::DefsRegistry;
299336

300337
/// The gradient id to reference via `fill="url(#id)"` for `shading` placed by
301338
/// `m` (shading space -> page box). Empty for an unrepresentable shading.
@@ -308,12 +345,10 @@ class GradientRegistry {
308345
sig << shading.type << ':' << static_cast<const void *>(&shading) << ':'
309346
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << m.e << ','
310347
<< m.f;
311-
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
348+
const auto [id, inserted] = intern(sig.str(), "g");
312349
if (!inserted) {
313-
return it->second;
350+
return id;
314351
}
315-
it->second = "g" + std::to_string(m_page) + "_" + std::to_string(++m_count);
316-
const std::string &id = it->second;
317352

318353
const std::array<double, 6> &c = shading.coords;
319354
if (shading.type == 2) {
@@ -327,27 +362,15 @@ class GradientRegistry {
327362
<< "\" cy=\"" << c[4] << "\" r=\"" << c[5] << "\" fx=\"" << c[0]
328363
<< "\" fy=\"" << c[1] << "\" fr=\"" << c[2] << '"';
329364
}
330-
// Only the translation (e, f) is rounded — it lives in page-box units where
331-
// 1/100 px is plenty; the linear part (a..d) keeps full precision so small
332-
// scale/skew factors aren't quantized to zero.
333-
m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\"matrix("
334-
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ','
335-
<< round2(m.e) << ',' << round2(m.f) << ")\">";
365+
m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\""
366+
<< svg_matrix(m) << "\">";
336367
for (const pdf::GradientStop &stop : shading.stops) {
337368
m_defs << "<stop offset=\"" << round2(stop.offset) << "\" stop-color=\""
338369
<< rgb_to_css(stop.rgb) << "\"/>";
339370
}
340371
m_defs << (shading.type == 2 ? "</linearGradient>" : "</radialGradient>");
341372
return id;
342373
}
343-
344-
[[nodiscard]] std::string defs() const { return m_defs.str(); }
345-
346-
private:
347-
std::uint32_t m_page{};
348-
std::uint32_t m_count{0};
349-
std::unordered_map<std::string, std::string> m_id_by_signature;
350-
std::ostringstream m_defs;
351374
};
352375

353376
/// Serialize an `sh` shading flood to an SVG `<rect>` covering the page box,
@@ -376,12 +399,14 @@ std::string svg_shading_fragment(const std::string &gradient_id,
376399
/// every `/XStep`/`/YStep`, and `patternTransform` (pattern space -> page box)
377400
/// places the lattice. An uncoloured pattern (`/PaintType 2`) ignores its
378401
/// content's own colours and paints in the path's fill colour, so the cache key
379-
/// folds that colour in. Ids are namespaced per page (`pat<page>_<n>`). Only
380-
/// paths and images inside the tile are rendered (nested text/shadings/patterns
381-
/// are skipped — rare). Returns "" for an unrepresentable pattern.
382-
class PatternRegistry {
402+
/// folds that colour in. Each cell is clipped to its `/BBox` so marks outside
403+
/// the cell (or in the gap when a step exceeds the BBox) don't leak into the
404+
/// tile. Ids are namespaced per page (`pat<page>_<n>`). Only paths and images
405+
/// inside the tile are rendered (nested text/shadings/patterns are skipped —
406+
/// rare). Returns "" for an unrepresentable pattern.
407+
class PatternRegistry : public DefsRegistry {
383408
public:
384-
explicit PatternRegistry(const std::uint32_t page) : m_page{page} {}
409+
using DefsRegistry::DefsRegistry;
385410

386411
std::string register_pattern(const pdf::Pattern &pattern,
387412
const util::math::Transform2D &m,
@@ -398,12 +423,10 @@ class PatternRegistry {
398423
if (uncoloured) {
399424
sig << ':' << device_color_to_css(fill_color);
400425
}
401-
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
426+
const auto [id, inserted] = intern(sig.str(), "pat");
402427
if (!inserted) {
403-
return it->second;
428+
return id;
404429
}
405-
it->second =
406-
"pat" + std::to_string(m_page) + "_" + std::to_string(++m_count);
407430

408431
// Tile content is laid out in pattern space (identity page transform); the
409432
// y-flip and placement live in `patternTransform`.
@@ -423,24 +446,29 @@ class PatternRegistry {
423446
}
424447
}
425448

426-
m_defs << "<pattern id=\"" << it->second
449+
m_defs << "<pattern id=\"" << id
427450
<< "\" patternUnits=\"userSpaceOnUse\" x=\""
428451
<< round2(pattern.bbox[0]) << "\" y=\"" << round2(pattern.bbox[1])
429452
<< "\" width=\"" << round2(std::abs(pattern.x_step))
430453
<< "\" height=\"" << round2(std::abs(pattern.y_step))
431-
<< "\" patternTransform=\"matrix(" << m.a << ',' << m.b << ',' << m.c
432-
<< ',' << m.d << ',' << round2(m.e) << ',' << round2(m.f) << ")\">"
433-
<< std::move(tile).str() << "</pattern>";
434-
return it->second;
454+
<< "\" patternTransform=\"" << svg_matrix(m) << "\">";
455+
// Clip each cell to its `/BBox` (ISO 32000-1 8.7.3.1). An overlapping
456+
// lattice (a step smaller than the BBox) can't be expressed as a single SVG
457+
// `<pattern>` and is not reproduced.
458+
const double bbox_w = pattern.bbox[2] - pattern.bbox[0];
459+
const double bbox_h = pattern.bbox[3] - pattern.bbox[1];
460+
if (bbox_w > 0 && bbox_h > 0) {
461+
m_defs << "<clipPath id=\"" << id << "c\"><rect x=\""
462+
<< round2(pattern.bbox[0]) << "\" y=\"" << round2(pattern.bbox[1])
463+
<< "\" width=\"" << round2(bbox_w) << "\" height=\""
464+
<< round2(bbox_h) << "\"/></clipPath><g clip-path=\"url(#" << id
465+
<< "c)\">" << std::move(tile).str() << "</g>";
466+
} else {
467+
m_defs << std::move(tile).str();
468+
}
469+
m_defs << "</pattern>";
470+
return id;
435471
}
436-
437-
[[nodiscard]] std::string defs() const { return m_defs.str(); }
438-
439-
private:
440-
std::uint32_t m_page{};
441-
std::uint32_t m_count{0};
442-
std::unordered_map<std::string, std::string> m_id_by_signature;
443-
std::ostringstream m_defs;
444472
};
445473

446474
/// Deduplicates CSS declarations into atomic, single-property classes. PDF text

src/odr/internal/pdf/AGENTS.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,9 +590,12 @@ stage exists to avoid.
590590
every `/XStep`/`/YStep`, with `patternTransform` (the pattern `/Matrix`)
591591
placing the lattice; a `/PatternType 1` fill references it as `fill="url(#…)"`.
592592
Coloured (`/PaintType 1`) cells carry their own colours; uncoloured
593-
(`/PaintType 2`) cells are painted in the current fill colour. Only paths and
594-
images inside a tile are rendered (nested text/shadings/patterns are skipped —
595-
rare).
593+
(`/PaintType 2`) cells are painted in the current fill colour (resolved
594+
through the Pattern colour space's base, so `[/Pattern /DeviceRGB]` keeps its
595+
tint). Each cell is clipped to its `/BBox`; an overlapping lattice (a step
596+
smaller than the BBox) can't be expressed as one SVG `<pattern>` and is not
597+
reproduced. Only paths and images inside a tile are rendered (nested
598+
text/shadings/patterns are skipped — rare).
596599
- **SVG residue** — where no 1:1 primitive exists; all at generation time, never
597600
rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small
598601
flat polygons (pdf.js's approach); color spaces

src/odr/internal/pdf/pdf_color.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,13 @@ ColorSpaceDef::to_rgb(const std::vector<double> &c) const {
148148
return alternate->to_rgb(tint->eval(c));
149149
}
150150
case ColorSpaceKind::pattern:
151+
// An uncoloured pattern (`/PaintType 2`) carries its colour in the Pattern
152+
// space's underlying base (e.g. `[/Pattern /DeviceRGB]`); convert through
153+
// it. Without a base there is no device colour to convert.
154+
if (base != nullptr) {
155+
return base->to_rgb(c);
156+
}
157+
return {0, 0, 0};
151158
case ColorSpaceKind::unknown:
152159
return {0, 0, 0};
153160
}

src/odr/internal/pdf/pdf_document_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ Pattern *parse_pattern(State &state, const ObjectReference &reference,
860860
if (object.has_stream) {
861861
pattern->content = parser.read_decoded_stream(object);
862862
}
863-
if (dictionary.has_key("Resources")) {
863+
if (dictionary.has_value("Resources")) {
864864
pattern->resources = parse_resources(state, dictionary["Resources"]);
865865
}
866866
}

test/src/internal/pdf/pdf_page_extractor.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,37 @@ TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_carries_colour) {
961961
EXPECT_EQ(p.fill_pattern->paint_type, 2);
962962
}
963963

964+
// An uncoloured pattern selected through a *named* Pattern colour space with an
965+
// underlying base (`[/Pattern /DeviceRGB]`) resolves its leading components
966+
// through that base — `1 0 0` is red, not black (the base would be ignored if
967+
// the Pattern space's `to_rgb` dropped it).
968+
TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_colour_through_base) {
969+
Pattern pattern;
970+
pattern.type = Pattern::Type::tiling;
971+
pattern.paint_type = 2;
972+
973+
std::vector<Object> array{Object(Name{"Pattern"}), Object(Name{"DeviceRGB"})};
974+
ColorSpaceContext ctx;
975+
ctx.resolve = [](const Object &o) { return o; };
976+
ctx.load_stream = [](const Object &) { return std::string{}; };
977+
ctx.named = nullptr;
978+
979+
Resources res;
980+
res.color_space["CS1"] =
981+
parse_color_space(Object(Array(std::move(array))), ctx);
982+
res.pattern["P2"] = &pattern;
983+
984+
const auto page =
985+
extract_page("/CS1 cs 1 0 0 /P2 scn 0 0 10 10 re f", res, Logger::null());
986+
ASSERT_EQ(page.size(), 1);
987+
const PathElement &p = std::get<PathElement>(page[0]);
988+
ASSERT_NE(p.fill_pattern, nullptr);
989+
EXPECT_EQ(p.fill_color.space, ColorSpace::device_rgb);
990+
EXPECT_DOUBLE_EQ(p.fill_color.rgb[0], 1.0);
991+
EXPECT_DOUBLE_EQ(p.fill_color.rgb[1], 0.0);
992+
EXPECT_DOUBLE_EQ(p.fill_color.rgb[2], 0.0);
993+
}
994+
964995
// The `sh` operator floods the current clip with a named `/Shading`, emitting a
965996
// `ShadingElement` placed by the CTM.
966997
TEST(PdfPageExtractor, sh_emits_shading_element) {

0 commit comments

Comments
 (0)