Skip to content

Commit 2d8bb8f

Browse files
Matthew HoroszowskiMatthew Horoszowski
authored andcommitted
feat(custom_xml): add string-blob helpers and integration tests
Phase 4 of customXml support per Plans/customxml-implementation-plan.md. String-blob helpers (plan section 2.3) -------------------------------------- - CustomXmlParts.add_string_blob(name, content, *, mime_hint, encoding, scope) wraps a string payload in a <blob xmlns='urn:python-pptx:blob' name=... encoding=text|base64 mime=...> envelope, then attaches it via add(...). Caller pre-encodes binary content; helper does NOT auto-base64. - CustomXmlParts.read_string_blob(name) reverse-resolves by name and returns the envelope text, or None if absent / not-our-envelope. - CustomXmlParts.blob_encoding(name) for callers mixing text and base64 blobs that need the original encoding to decode on read. Integration fixtures -------------------- Four synthetic .pptx files generated by our own API live at tests/test_files/customxml/: - presentation-scoped.pptx — Office.js / Word default topology - package-scoped.pptx — VSTO / SharePoint topology - multipart.pptx — every Phase 1-4 surface in one file - clean.pptx — regression baseline; no customXml at all The generation script at tests/test_files/customxml/_generate_fixtures.py re-creates them deterministically (GUIDs pinned). README.rst documents what each fixture exercises and notes that real third-party fixtures from SharePoint/Office.js/VSTO will land later during the manual PowerPoint UI matrix in plan section 5.4. Integration tests (tests/integration/test_customxml_roundtrip.py) load each fixture, exercise the public API against it, save through BytesIO, reload, and assert state survives. Covers payload preservation, GUID/schema_refs preservation, scope-topology preservation through save, mutation + round-trip, removal, core_properties coexistence, and the 'no customXml at all' regression case. Counts ------ - 10 new string-blob unit tests in tests/test_custom_xml.py - 20 new integration tests in tests/integration/test_customxml_roundtrip.py - Total tests now: 2986 passed (2956 baseline + 30 new)
1 parent aaaf181 commit 2d8bb8f

10 files changed

Lines changed: 465 additions & 0 deletions

File tree

src/pptx/custom_xml.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
# `{prefix}{datastore_item_id}` and the value is the user-assigned name.
2828
NAME_PROPERTY_PREFIX = "_pptx_customxml_name_"
2929

30+
# Reserved namespace for the string-blob envelope written by `add_string_blob`.
31+
# Read back through `read_string_blob` only — callers using `add(...)` directly
32+
# should pick their own namespace, not this one.
33+
BLOB_NAMESPACE = "urn:python-pptx:blob"
34+
3035

3136
class CustomXmlParts(Sequence[CustomXmlPart]):
3237
"""Collection of customXml data parts attached to the presentation.
@@ -145,6 +150,86 @@ def add(
145150

146151
return data_part
147152

153+
def add_string_blob(
154+
self,
155+
name: str,
156+
content: str,
157+
*,
158+
mime_hint: str | None = None,
159+
encoding: Literal["text", "base64"] = "text",
160+
scope: Literal["presentation", "package"] = "presentation",
161+
) -> CustomXmlPart:
162+
"""Embed a string payload as a customXml part.
163+
164+
Wraps `content` in a one-element XML envelope under the reserved
165+
`urn:python-pptx:blob` namespace::
166+
167+
<blob xmlns="urn:python-pptx:blob"
168+
name="…" mime="…" encoding="text|base64">…</blob>
169+
170+
For binary or non-XML-safe text, set ``encoding="base64"`` and pass
171+
already-encoded `content` — the helper does NOT encode for you. Read
172+
back via :meth:`read_string_blob`.
173+
174+
`mime_hint` is stored as the ``mime`` attribute on the envelope and
175+
round-trips for the caller's reference; it has no effect on PowerPoint.
176+
177+
Returns the created :class:`CustomXmlPart`. Already attached at the
178+
chosen scope; nothing else is needed before ``prs.save(...)``.
179+
"""
180+
if not isinstance(name, str) or not name:
181+
raise ValueError("name must be a non-empty string")
182+
if not isinstance(content, str): # pyright: ignore[reportUnnecessaryIsInstance]
183+
raise TypeError("content must be str, got %s" % type(content).__name__)
184+
if encoding not in ("text", "base64"):
185+
raise ValueError(
186+
"encoding must be 'text' or 'base64', got %r" % (encoding,)
187+
)
188+
189+
from lxml import etree
190+
191+
envelope = etree.Element("{%s}blob" % BLOB_NAMESPACE, nsmap={None: BLOB_NAMESPACE})
192+
envelope.set("name", name)
193+
envelope.set("encoding", encoding)
194+
if mime_hint is not None:
195+
envelope.set("mime", mime_hint)
196+
envelope.text = content
197+
198+
return self.add(envelope, name=name, scope=scope)
199+
200+
def read_string_blob(self, name: str) -> str | None:
201+
"""Return the string payload of the blob part `name`, or `None`.
202+
203+
Locates the part via :meth:`by_name`. Returns `None` if no such part
204+
exists or if the part is not a `urn:python-pptx:blob` envelope (i.e.
205+
was added by some other API or tool).
206+
207+
For ``encoding="base64"`` blobs, the still-encoded string is returned
208+
— the caller decodes. The original encoding is recoverable from
209+
:meth:`blob_encoding`.
210+
"""
211+
part = self.by_name(name)
212+
if part is None:
213+
return None
214+
root = part.element
215+
if root.tag != "{%s}blob" % BLOB_NAMESPACE:
216+
return None
217+
return root.text or ""
218+
219+
def blob_encoding(self, name: str) -> str | None:
220+
"""Return the `encoding` attribute of the blob part `name`, or `None`.
221+
222+
Useful when a caller mixes text and base64 blobs and needs to decode
223+
the latter on read.
224+
"""
225+
part = self.by_name(name)
226+
if part is None:
227+
return None
228+
root = part.element
229+
if root.tag != "{%s}blob" % BLOB_NAMESPACE:
230+
return None
231+
return root.get("encoding")
232+
148233
def remove(self, part: Union[CustomXmlPart, int, str]) -> None:
149234
"""Remove a customXml part from the presentation.
150235

tests/integration/__init__.py

Whitespace-only changes.
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# pyright: reportPrivateUsage=false
2+
3+
"""Integration test suite for customXml round-trip.
4+
5+
Loads each synthetic fixture under ``tests/test_files/customxml/``, exercises
6+
the public API against it, saves to a fresh BytesIO, reloads, and asserts the
7+
state survived.
8+
9+
Real third-party fixtures (SharePoint-saved, Office.js-produced, VSTO-tooled)
10+
will land later under ``sharepoint-saved.pptx`` etc. once captured during the
11+
manual PowerPoint UI matrix in ``Plans/customxml-implementation-plan.md`` §5.4.
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import os
17+
from io import BytesIO
18+
19+
import pytest
20+
21+
from pptx import Presentation
22+
from pptx.opc.constants import RELATIONSHIP_TYPE as RT
23+
from pptx.parts.custom_xml import CustomXmlPart
24+
25+
26+
_FIXTURE_DIR = os.path.join(
27+
os.path.dirname(os.path.abspath(__file__)),
28+
os.pardir,
29+
"test_files",
30+
"customxml",
31+
)
32+
33+
34+
def _fixture(name: str) -> str:
35+
return os.path.join(_FIXTURE_DIR, name)
36+
37+
38+
def _roundtrip(prs):
39+
buf = BytesIO()
40+
prs.save(buf)
41+
buf.seek(0)
42+
return Presentation(buf)
43+
44+
45+
class DescribePresentationScopedFixture:
46+
def it_loads_the_part(self):
47+
prs = Presentation(_fixture("presentation-scoped.pptx"))
48+
assert len(prs.custom_xml_parts) == 1
49+
50+
def it_upgrades_loaded_part_to_CustomXmlPart_class(self):
51+
prs = Presentation(_fixture("presentation-scoped.pptx"))
52+
part = prs.custom_xml_parts[0]
53+
assert isinstance(part, CustomXmlPart)
54+
55+
def it_preserves_the_payload(self):
56+
prs = Presentation(_fixture("presentation-scoped.pptx"))
57+
part = prs.custom_xml_parts.by_name("provenance")
58+
assert part is not None
59+
assert part.element.tag == "{urn:my:provenance}provenance"
60+
source = part.element.find("{urn:my:provenance}source")
61+
assert source is not None
62+
assert source.text == "integration-fixture"
63+
64+
def it_preserves_the_pinned_guid(self):
65+
prs = Presentation(_fixture("presentation-scoped.pptx"))
66+
part = prs.custom_xml_parts[0]
67+
assert part.datastore_item_id == "{1A2B3C4D-5E6F-7890-ABCD-EF1234567890}"
68+
69+
def it_preserves_the_schema_refs(self):
70+
prs = Presentation(_fixture("presentation-scoped.pptx"))
71+
part = prs.custom_xml_parts[0]
72+
assert part.schema_refs == ("urn:my:provenance",)
73+
74+
def it_preserves_the_presentation_scope_through_save(self):
75+
prs = Presentation(_fixture("presentation-scoped.pptx"))
76+
reloaded = _roundtrip(prs)
77+
prs_rel_types = {r.reltype for r in reloaded.part.rels.values()}
78+
pkg_rel_types = {r.reltype for r in reloaded.part.package._rels.values()}
79+
assert RT.CUSTOM_XML in prs_rel_types
80+
assert RT.CUSTOM_XML not in pkg_rel_types
81+
82+
83+
class DescribePackageScopedFixture:
84+
def it_loads_the_part(self):
85+
prs = Presentation(_fixture("package-scoped.pptx"))
86+
assert len(prs.custom_xml_parts) == 1
87+
88+
def it_preserves_the_payload(self):
89+
prs = Presentation(_fixture("package-scoped.pptx"))
90+
part = prs.custom_xml_parts.by_name("vsto")
91+
assert part is not None
92+
assert part.element.tag == "{urn:my:vsto}vsto-config"
93+
94+
def it_preserves_the_package_scope_through_save(self):
95+
prs = Presentation(_fixture("package-scoped.pptx"))
96+
reloaded = _roundtrip(prs)
97+
prs_rel_types = {r.reltype for r in reloaded.part.rels.values()}
98+
pkg_rel_types = {r.reltype for r in reloaded.part.package._rels.values()}
99+
assert RT.CUSTOM_XML in pkg_rel_types
100+
assert RT.CUSTOM_XML not in prs_rel_types
101+
102+
def it_preserves_the_pinned_guid(self):
103+
prs = Presentation(_fixture("package-scoped.pptx"))
104+
part = prs.custom_xml_parts[0]
105+
assert part.datastore_item_id == "{ABCDEF12-3456-7890-ABCD-EF1234567890}"
106+
107+
108+
class DescribeMultipartFixture:
109+
def it_loads_two_customxml_parts_at_mixed_scopes(self):
110+
prs = Presentation(_fixture("multipart.pptx"))
111+
assert len(prs.custom_xml_parts) == 2 + 1 # provenance + extra + readme blob
112+
113+
def it_preserves_custom_document_properties(self):
114+
prs = Presentation(_fixture("multipart.pptx"))
115+
assert prs.custom_properties["Source"] == "deck-builder-cli@1.4.2"
116+
assert prs.custom_properties["BuildNumber"] == 42
117+
assert prs.custom_properties["IsDraft"] is True
118+
119+
def it_finds_each_part_by_name(self):
120+
prs = Presentation(_fixture("multipart.pptx"))
121+
assert prs.custom_xml_parts.by_name("provenance") is not None
122+
assert prs.custom_xml_parts.by_name("extra") is not None
123+
assert prs.custom_xml_parts.by_name("readme") is not None
124+
125+
def it_round_trips_through_save_load_with_mutations(self):
126+
prs = Presentation(_fixture("multipart.pptx"))
127+
# mutate something in each layer
128+
prs.custom_properties["NewKey"] = "added"
129+
prs.custom_xml_parts.by_name("provenance").add_item(
130+
"added-by-test", "value"
131+
)
132+
133+
reloaded = _roundtrip(prs)
134+
135+
assert reloaded.custom_properties["NewKey"] == "added"
136+
assert reloaded.custom_properties["Source"] == "deck-builder-cli@1.4.2"
137+
prov = reloaded.custom_xml_parts.by_name("provenance")
138+
assert prov is not None
139+
# The added child element survived the round-trip
140+
added = [c for c in prov.element if c.tag.endswith("added-by-test")]
141+
assert len(added) == 1
142+
assert added[0].text == "value"
143+
144+
def it_round_trips_the_string_blob_helper(self):
145+
prs = Presentation(_fixture("multipart.pptx"))
146+
content = prs.custom_xml_parts.read_string_blob("readme")
147+
assert content is not None
148+
assert "# Hello" in content
149+
assert "markdown content" in content
150+
151+
def it_remove_then_save_drops_the_part(self):
152+
prs = Presentation(_fixture("multipart.pptx"))
153+
provenance = prs.custom_xml_parts.by_name("provenance")
154+
prs.custom_xml_parts.remove(provenance)
155+
reloaded = _roundtrip(prs)
156+
assert reloaded.custom_xml_parts.by_name("provenance") is None
157+
# Other parts still present
158+
assert reloaded.custom_xml_parts.by_name("extra") is not None
159+
assert reloaded.custom_xml_parts.by_name("readme") is not None
160+
161+
162+
class DescribeCleanFixture:
163+
"""A presentation with no customXml at all should have no related rels."""
164+
165+
def it_has_no_customxml_parts(self):
166+
prs = Presentation(_fixture("clean.pptx"))
167+
assert len(prs.custom_xml_parts) == 0
168+
169+
def it_round_trips_with_no_rels_added(self):
170+
prs = Presentation(_fixture("clean.pptx"))
171+
# do nothing
172+
reloaded = _roundtrip(prs)
173+
prs_rel_types = {r.reltype for r in reloaded.part.rels.values()}
174+
pkg_rel_types = {r.reltype for r in reloaded.part.package._rels.values()}
175+
assert RT.CUSTOM_XML not in prs_rel_types
176+
assert RT.CUSTOM_XML not in pkg_rel_types
177+
assert RT.CUSTOM_PROPERTIES not in pkg_rel_types
178+
179+
def it_can_have_customxml_added_after_loading(self):
180+
prs = Presentation(_fixture("clean.pptx"))
181+
prs.custom_xml_parts.add(
182+
b'<after-load xmlns="u:al"/>',
183+
name="after-load",
184+
)
185+
reloaded = _roundtrip(prs)
186+
part = reloaded.custom_xml_parts.by_name("after-load")
187+
assert part is not None
188+
assert part.element.tag == "{u:al}after-load"
189+
190+
191+
class DescribeCoreAndCustomCoexistence:
192+
def it_preserves_core_properties_alongside_custom_ones(self):
193+
prs = Presentation(_fixture("multipart.pptx"))
194+
prs.core_properties.author = "Athena"
195+
prs.core_properties.subject = "Integration test"
196+
197+
reloaded = _roundtrip(prs)
198+
199+
assert reloaded.core_properties.author == "Athena"
200+
assert reloaded.core_properties.subject == "Integration test"
201+
# custom properties still intact
202+
assert reloaded.custom_properties["Source"] == "deck-builder-cli@1.4.2"
203+
# customXml parts still intact
204+
assert len(reloaded.custom_xml_parts) == 3

tests/test_custom_xml.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,67 @@ def it_supports_add_item_convenience(self, empty_prs):
231231
assert children[1].get("priority") == "high"
232232

233233

234+
class DescribeCustomXmlParts_string_blob:
235+
def it_adds_a_string_blob(self, empty_prs):
236+
part = empty_prs.custom_xml_parts.add_string_blob(
237+
"readme", "# Hello\nworld", mime_hint="text/markdown"
238+
)
239+
assert isinstance(part, CustomXmlPart)
240+
assert part.element.tag == "{urn:python-pptx:blob}blob"
241+
assert part.element.get("name") == "readme"
242+
assert part.element.get("mime") == "text/markdown"
243+
assert part.element.get("encoding") == "text"
244+
assert part.element.text == "# Hello\nworld"
245+
246+
def it_reads_back_a_string_blob_by_name(self, empty_prs):
247+
empty_prs.custom_xml_parts.add_string_blob("note", "secret message")
248+
assert empty_prs.custom_xml_parts.read_string_blob("note") == "secret message"
249+
250+
def it_returns_None_for_missing_blob(self, empty_prs):
251+
assert empty_prs.custom_xml_parts.read_string_blob("missing") is None
252+
253+
def it_returns_None_for_a_non_blob_part(self, empty_prs):
254+
empty_prs.custom_xml_parts.add(b'<other xmlns="u:o"/>', name="other")
255+
# name lookup finds the part, but it's not the blob envelope shape
256+
assert empty_prs.custom_xml_parts.read_string_blob("other") is None
257+
assert empty_prs.custom_xml_parts.blob_encoding("other") is None
258+
259+
def it_round_trips_a_string_blob(self, empty_prs):
260+
empty_prs.custom_xml_parts.add_string_blob("md", "content")
261+
reloaded = _roundtrip(empty_prs)
262+
assert reloaded.custom_xml_parts.read_string_blob("md") == "content"
263+
assert reloaded.custom_xml_parts.blob_encoding("md") == "text"
264+
265+
def it_supports_base64_encoding(self, empty_prs):
266+
encoded = "aGVsbG8gd29ybGQ=" # b64 of "hello world"
267+
empty_prs.custom_xml_parts.add_string_blob("bin", encoded, encoding="base64")
268+
assert empty_prs.custom_xml_parts.read_string_blob("bin") == encoded
269+
assert empty_prs.custom_xml_parts.blob_encoding("bin") == "base64"
270+
271+
def it_rejects_empty_name(self, empty_prs):
272+
with pytest.raises(ValueError):
273+
empty_prs.custom_xml_parts.add_string_blob("", "content")
274+
275+
def it_rejects_non_string_content(self, empty_prs):
276+
with pytest.raises(TypeError):
277+
empty_prs.custom_xml_parts.add_string_blob("x", 42) # type: ignore[arg-type]
278+
279+
def it_rejects_unknown_encoding(self, empty_prs):
280+
with pytest.raises(ValueError):
281+
empty_prs.custom_xml_parts.add_string_blob(
282+
"x", "content", encoding="utf-7" # type: ignore[arg-type]
283+
)
284+
285+
def it_supports_package_scope(self, empty_prs):
286+
from pptx.opc.constants import RELATIONSHIP_TYPE as RT_
287+
288+
empty_prs.custom_xml_parts.add_string_blob(
289+
"x", "content", scope="package"
290+
)
291+
rel_types = {r.reltype for r in empty_prs.part.package._rels.values()}
292+
assert RT_.CUSTOM_XML in rel_types
293+
294+
234295
class DescribeCustomXmlPart_name_edge_cases:
235296
def it_returns_None_when_no_name_property_for_the_guid(self, empty_prs):
236297
# Add a part WITHOUT a name. .name should return None even though the

0 commit comments

Comments
 (0)