Skip to content

Commit 7d02cd8

Browse files
andiwandclaude
andcommitted
PDF: text colour, strongly-typed render mode, image clip fix
- Carry the non-stroking (fill) and stroking device colours on each TextElement and paint runs in their colour (interned `.k` class) when it is not the default black — fill modes use the fill colour, the stroke-only modes (Tr 1/5) the stroking colour. The colour rides the visible layer in the dual-layer (PUA glyph) cases. - Replace the bare `int rendering_mode` with a strongly-typed `TextRenderingMode` enum (ISO 32000-1 Table 106) across the graphics state, page element and renderer, removing the magic 0/1/3/5/7 comparisons. - Wrap clipped `<image>` elements in a transform-free `<g clip-path>`: a clip-path on the image itself resolves in the image's post-transform unit-square space and clips the whole image away. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent eb8b45d commit 7d02cd8

6 files changed

Lines changed: 89 additions & 31 deletions

File tree

src/odr/internal/html/pdf_file.cpp

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,12 @@ std::string svg_path_fragment(const pdf::PathElement &path,
181181
/// or "" when it carries no pass-through bytes. The image fills the unit square
182182
/// in user space (ISO 32000-1 8.10.5); the transform maps that square — through
183183
/// a vertical flip (the image's first row is its top, SVG draws y-down) and the
184-
/// CTM — into the page box. `clip_id` installs a clip via `clip-path`.
184+
/// CTM — into the page box. `clip_id`, when non-empty, installs a clip via a
185+
/// wrapping `<g clip-path>`. The clip geometry is in the page viewBox
186+
/// (`userSpaceOnUse`), but the `<image>` carries its own `transform`, so a
187+
/// `clip-path` placed *on the image* would be resolved in the image's
188+
/// post-transform unit-square space and clip the whole image away. The `<g>`
189+
/// carries no transform, so the clip is read in the viewBox where it lives.
185190
std::string svg_image_fragment(const pdf::ImageElement &image,
186191
const util::math::Transform2D &to_box,
187192
const std::string &clip_id) {
@@ -194,13 +199,18 @@ std::string svg_image_fragment(const pdf::ImageElement &image,
194199
const util::math::Transform2D m = flip * image.transform * to_box;
195200

196201
std::ostringstream f;
202+
// The clip wraps the image in a transform-free `<g>` rather than sitting on
203+
// the `<image>`: see the function comment.
204+
if (!clip_id.empty()) {
205+
f << "<g clip-path=\"url(#" << clip_id << ")\">";
206+
}
197207
f << R"(<image width="1" height="1" preserveAspectRatio="none" transform="matrix()"
198208
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << round2(m.e)
199209
<< ',' << round2(m.f) << ")\"";
210+
f << " href=\"" << file_to_url(image.data, image.mime) << "\"/>";
200211
if (!clip_id.empty()) {
201-
f << " clip-path=\"url(#" << clip_id << ")\"";
212+
f << "</g>";
202213
}
203-
f << " href=\"" << file_to_url(image.data, image.mime) << "\"/>";
204214
return std::move(f).str();
205215
}
206216

@@ -621,7 +631,29 @@ class HtmlServiceImpl final : public HtmlService {
621631
// Tr 3 (invisible) and Tr 7 (clip-only) paint nothing; keep them
622632
// selectable via the transparent `.i` class.
623633
const bool invisible =
624-
text.rendering_mode == 3 || text.rendering_mode == 7;
634+
text.rendering_mode == pdf::TextRenderingMode::invisible ||
635+
text.rendering_mode == pdf::TextRenderingMode::clip;
636+
637+
// The run's visible paint colour, folded onto the visible span as an
638+
// interned colour class — but only when it is not the default black, so
639+
// the overwhelmingly common black run adds nothing. The per-font
640+
// `.fvN`/`.gvN` classes declare `color:#000`; this class is emitted
641+
// after them in <head> (equal specificity), so it overrides. Invisible
642+
// runs (Tr 3/7) stay transparent via `.i`, so they take no colour
643+
// class. The fill modes paint with the non-stroking colour, the
644+
// stroke-only modes (Tr 1/5) with the stroking colour.
645+
std::string color_suffix;
646+
if (!invisible) {
647+
const pdf::GraphicsState::Color &paint =
648+
(text.rendering_mode == pdf::TextRenderingMode::stroke ||
649+
text.rendering_mode == pdf::TextRenderingMode::stroke_clip)
650+
? text.stroke_color
651+
: text.fill_color;
652+
if (std::string css = device_color_to_css(paint);
653+
css != "rgb(0,0,0)") {
654+
color_suffix = ' ' + styles.intern("k", "color:" + std::move(css));
655+
}
656+
}
625657

626658
// Placement and spacing are shared by both layers of a run; build them
627659
// once on `base`.
@@ -740,17 +772,18 @@ class HtmlServiceImpl final : public HtmlService {
740772
std::string classes = std::move(base);
741773
classes += ' ';
742774
classes += font_class(font, invisible, /*nested=*/false);
775+
classes += color_suffix;
743776
page_out.items.push_back(
744777
SpanOut{std::move(classes), escape_text(text.text), {}, {}});
745778
} else {
746779
// Dual layer (a glyph lost its scalar to an earlier one): a
747780
// transparent selectable Unicode span with the PUA glyph layer
748781
// nested inside, the latter folded into the combined `.gvN` /
749-
// `.giN` class.
750-
page_out.items.push_back(
751-
SpanOut{base + " i", escape_text(text.text),
752-
font_class(font, invisible, /*nested=*/true),
753-
escape_text(glyph_run(*text.font, text.codes))});
782+
// `.giN` class. The colour rides the visible (nested) layer.
783+
page_out.items.push_back(SpanOut{
784+
base + " i", escape_text(text.text),
785+
font_class(font, invisible, /*nested=*/true) + color_suffix,
786+
escape_text(glyph_run(*text.font, text.codes))});
754787
}
755788
} else if (font != 0) {
756789
// The visible glyph layer: PUA code points in the embedded font,
@@ -763,16 +796,17 @@ class HtmlServiceImpl final : public HtmlService {
763796
// Unicode (for copy/search) with the glyph layer nested inside.
764797
// The nested child overlays the run origin and inherits the
765798
// placement via the combined `.gvN` / `.giN` class.
766-
page_out.items.push_back(
767-
SpanOut{base + " i", escape_text(text.text),
768-
font_class(font, invisible, /*nested=*/true),
769-
std::move(glyph_text)});
799+
page_out.items.push_back(SpanOut{
800+
base + " i", escape_text(text.text),
801+
font_class(font, invisible, /*nested=*/true) + color_suffix,
802+
std::move(glyph_text)});
770803
} else {
771804
// Display-only run: nothing is extractable (the `no_unicode` case),
772805
// so the glyph layer stands alone and carries the placement itself
773806
// (`base`), `.g` (unselectable) and the combined paint+font class.
774807
std::string glyph_classes = base + " g ";
775808
glyph_classes += font_class(font, invisible, /*nested=*/false);
809+
glyph_classes += color_suffix;
776810
page_out.items.push_back(SpanOut{
777811
std::move(glyph_classes), std::move(glyph_text), {}, {}});
778812
}
@@ -783,6 +817,7 @@ class HtmlServiceImpl final : public HtmlService {
783817
if (invisible) {
784818
classes += " i";
785819
}
820+
classes += color_suffix;
786821
page_out.items.push_back(
787822
SpanOut{std::move(classes), escape_text(text.text), {}, {}});
788823
}

src/odr/internal/pdf/pdf_graphics_state.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ void GraphicsState::execute(const GraphicsOperator &op) {
250250
break;
251251
case GraphicsOperatorType::set_text_rendering_mode:
252252
current().text.rendering_mode =
253-
static_cast<int>(op.arguments.at(0).as_integer());
253+
static_cast<TextRenderingMode>(op.arguments.at(0).as_integer());
254254
break;
255255
case GraphicsOperatorType::set_text_rise:
256256
current().text.rise = op.arguments.at(0).as_real();

src/odr/internal/pdf/pdf_graphics_state.hpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,21 @@ enum class ColorSpace {
1818
device_cmyk,
1919
};
2020

21+
/// Text rendering mode (`Tr`, ISO 32000-1 Table 106): how shown glyphs are
22+
/// painted and/or added to the clipping path. The low two bits select the paint
23+
/// (fill / stroke / both / none); the `_clip` modes additionally add the glyph
24+
/// outlines to the clip. The integer values are the `Tr` operands.
25+
enum class TextRenderingMode {
26+
fill = 0,
27+
stroke = 1,
28+
fill_stroke = 2,
29+
invisible = 3,
30+
fill_clip = 4,
31+
stroke_clip = 5,
32+
fill_stroke_clip = 6,
33+
clip = 7,
34+
};
35+
2136
/// One segment of a subpath, in user space (the CTM is already applied at
2237
/// construction time, ISO 32000-1 8.5.2.1). A line carries only `end`; a cubic
2338
/// Bézier carries its two control points as well.
@@ -72,16 +87,16 @@ struct GraphicsState {
7287
};
7388

7489
struct Text {
75-
double char_spacing{0}; // Tc
76-
double word_spacing{0}; // Tw
77-
double horizontal_scaling{100}; // Tz, in percent (100 = normal)
78-
double leading{0}; // TL
79-
std::string font; // Tf resource name
80-
double size{}; // Tf size
81-
int rendering_mode{0}; // Tr
82-
double rise{0}; // Ts
83-
util::math::Transform2D matrix; // Tm
84-
util::math::Transform2D line_matrix; // Tlm
90+
double char_spacing{0}; // Tc
91+
double word_spacing{0}; // Tw
92+
double horizontal_scaling{100}; // Tz, in percent (100 = normal)
93+
double leading{0}; // TL
94+
std::string font; // Tf resource name
95+
double size{}; // Tf size
96+
TextRenderingMode rendering_mode{TextRenderingMode::fill}; // Tr
97+
double rise{0}; // Ts
98+
util::math::Transform2D matrix; // Tm
99+
util::math::Transform2D line_matrix; // Tlm
85100
std::array<double, 2> glyph_width{};
86101
std::array<double, 4> glyph_bounding_box{};
87102
};

src/odr/internal/pdf/pdf_page_element.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,18 @@ struct TextElement {
2121
util::math::Transform2D transform;
2222
/// Resolved font, or `nullptr` when the `/Font` resource name was unknown.
2323
Font *font{nullptr};
24-
double size{0}; // Tf size
25-
double char_spacing{0}; // Tc
26-
double word_spacing{0}; // Tw
27-
double horizontal_scaling{100}; // Tz, percent
28-
double rise{0}; // Ts
29-
int rendering_mode{0}; // Tr
24+
double size{0}; // Tf size
25+
double char_spacing{0}; // Tc
26+
double word_spacing{0}; // Tw
27+
double horizontal_scaling{100}; // Tz, percent
28+
double rise{0}; // Ts
29+
TextRenderingMode rendering_mode{TextRenderingMode::fill}; // Tr
30+
/// Non-stroking (fill) and stroking colours in force when the run was shown,
31+
/// as device colours. The renderer paints the run in whichever its rendering
32+
/// mode selects — the fill colour for the common fill modes, the stroking
33+
/// colour for the stroke-only modes (Tr 1/5) — defaulting to black.
34+
GraphicsState::Color fill_color;
35+
GraphicsState::Color stroke_color;
3036
/// Raw character codes shown by this segment (one `Tj`, or one string of a
3137
/// `TJ` array).
3238
std::string codes;

src/odr/internal/pdf/pdf_page_extractor.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ void show(std::vector<PageElement> &out, GraphicsState &state,
270270
element.horizontal_scaling = text.horizontal_scaling;
271271
element.rise = text.rise;
272272
element.rendering_mode = text.rendering_mode;
273+
element.fill_color = state.current().other_color;
274+
element.stroke_color = state.current().stroke_color;
273275
ResolvedText resolved = resolve_text(marked, font, codes);
274276
element.text = std::move(resolved.text);
275277
element.no_unicode = resolved.no_unicode;

test/src/internal/pdf/pdf_page_extractor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ TEST(PdfPageExtractor, form_xobject_self_cycle_terminates) {
337337
TEST(PdfPageExtractor, rendering_mode_propagates) {
338338
const auto texts = run("BT /F1 10 Tf 3 Tr 0 0 Td (x) Tj ET");
339339
ASSERT_EQ(texts.size(), 1);
340-
EXPECT_EQ(texts[0].rendering_mode, 3);
340+
EXPECT_EQ(texts[0].rendering_mode, TextRenderingMode::invisible);
341341
}
342342

343343
// A composite font with no `/ToUnicode` and no usable predefined encoding has

0 commit comments

Comments
 (0)