Skip to content

Commit a8cac32

Browse files
Matthew HoroszowskiMatthew Horoszowski
authored andcommitted
feat(parts): add CustomPropertiesPart and CustomXmlPart subclasses
Phase 2 of customXml support per Plans/customxml-implementation-plan.md. Three new XmlPart subclasses, two registered with PartFactory: - CustomPropertiesPart -> /docProps/custom.xml Owns CT_Properties; default() factory; add/get/remove/property_names delegators plus __contains__/__iter__/__len__ for the Phase-3 wrapper to compose around. Registered against CT.OFC_CUSTOM_PROPERTIES. - CustomXmlPropertiesPart -> /customXml/itemPropsN.xml Owns CT_DatastoreItem; new() factory; datastore_item_id and schema_refs accessors. Registered against CT.OFC_CUSTOM_XML_PROPERTIES. - CustomXmlPart -> /customXml/itemN.xml Owns the caller's arbitrary XML payload. new_pair() atomically creates both the data part and its CustomXmlPropertiesPart sibling, allocates matching N indices, generates a {GUID} if datastore_item_id omitted, and wires the RT.CUSTOM_XML_PROPS rel from data to props. Exposes element/blob/replace_xml plus pass-through accessors that delegate to the props part. INTENTIONALLY not registered against CT.XML per plan section 3.6 — Phase 3 will wrap loaded base Part instances on enumeration. Anti-comment in __init__.py marks the deferred mapping. 38 new unit tests; 100% / 96% line coverage on the new modules. Existing 2859-test suite still green (2897 total).
1 parent 9ad27ba commit a8cac32

5 files changed

Lines changed: 698 additions & 0 deletions

File tree

src/pptx/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from pptx.opc.package import PartFactory
1212
from pptx.parts.chart import ChartPart
1313
from pptx.parts.coreprops import CorePropertiesPart
14+
from pptx.parts.custom_properties import CustomPropertiesPart
15+
from pptx.parts.custom_xml import CustomXmlPropertiesPart
1416
from pptx.parts.image import ImagePart
1517
from pptx.parts.media import MediaPart
1618
from pptx.parts.presentation import PresentationPart
@@ -38,6 +40,12 @@
3840
CT.PML_TEMPLATE_MAIN: PresentationPart,
3941
CT.PML_SLIDESHOW_MAIN: PresentationPart,
4042
CT.OPC_CORE_PROPERTIES: CorePropertiesPart,
43+
CT.OFC_CUSTOM_PROPERTIES: CustomPropertiesPart,
44+
CT.OFC_CUSTOM_XML_PROPERTIES: CustomXmlPropertiesPart,
45+
# NOTE: CT.XML is intentionally NOT mapped to CustomXmlPart — see
46+
# `Plans/customxml-implementation-plan.md` §3.6. The Phase-3
47+
# `CustomXmlParts` collection wraps loaded base `Part` instances
48+
# at enumeration time.
4149
CT.PML_NOTES_MASTER: NotesMasterPart,
4250
CT.PML_NOTES_SLIDE: NotesSlidePart,
4351
CT.PML_SLIDE: SlidePart,
@@ -71,6 +79,8 @@
7179
del (
7280
ChartPart,
7381
CorePropertiesPart,
82+
CustomPropertiesPart,
83+
CustomXmlPropertiesPart,
7484
ImagePart,
7585
MediaPart,
7686
SlidePart,
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Custom Document Properties part — `/docProps/custom.xml`."""
2+
3+
from __future__ import annotations
4+
5+
from typing import TYPE_CHECKING, Iterator
6+
7+
from pptx.opc.constants import CONTENT_TYPE as CT
8+
from pptx.opc.package import XmlPart
9+
from pptx.opc.packuri import PackURI
10+
from pptx.oxml.custom_properties import CT_Properties, CT_Property
11+
12+
if TYPE_CHECKING:
13+
from pptx.package import Package
14+
15+
16+
class CustomPropertiesPart(XmlPart):
17+
"""Corresponds to part named `/docProps/custom.xml`.
18+
19+
Holds the package's custom (user-defined) document properties — the values
20+
surfaced under `File → Properties → Advanced` in PowerPoint. The
21+
user-facing Mapping wrapper lives at `pptx.custom_properties.CustomProperties`
22+
(Phase 3); this part just owns the XML and the per-property delegators.
23+
"""
24+
25+
_element: CT_Properties
26+
27+
@classmethod
28+
def default(cls, package: "Package") -> "CustomPropertiesPart":
29+
"""Return a new empty `CustomPropertiesPart` ready to add to `package`.
30+
31+
Useful as the seed when a presentation doesn't yet have a custom
32+
properties part. The returned instance has no properties on it; the
33+
caller adds entries via `add_property(...)`.
34+
"""
35+
return cls(
36+
PackURI("/docProps/custom.xml"),
37+
CT.OFC_CUSTOM_PROPERTIES,
38+
package,
39+
CT_Properties.new_properties(),
40+
)
41+
42+
def add_property(self, name: str, value: object) -> CT_Property:
43+
"""Add a new `<op:property>` for `(name, value)` and return it."""
44+
return self._element.add_property(name, value)
45+
46+
def get_property(self, name: str) -> CT_Property | None:
47+
"""Return the `<op:property>` with `name` or `None` if absent."""
48+
return self._element.get_property(name)
49+
50+
def remove_property(self, name: str) -> bool:
51+
"""Remove the `<op:property>` with `name`, returning True if found."""
52+
return self._element.remove_property(name)
53+
54+
@property
55+
def property_names(self) -> tuple[str, ...]:
56+
"""Tuple of property names in document order."""
57+
return self._element.property_names
58+
59+
def __contains__(self, name: object) -> bool:
60+
return isinstance(name, str) and self._element.get_property(name) is not None
61+
62+
def __iter__(self) -> Iterator[str]:
63+
return iter(self._element.property_names)
64+
65+
def __len__(self) -> int:
66+
return len(self._element.property_lst)

src/pptx/parts/custom_xml.py

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
"""customXml data parts and their itemProps siblings.
2+
3+
Two part subclasses living together in this module because they are an atomic
4+
pair — a `CustomXmlPart` is meaningless without its `CustomXmlPropertiesPart`
5+
sibling, and vice versa. Both are created by `CustomXmlPart.new_pair(...)`.
6+
7+
Schema references: ECMA-376 Part 1, §15.2.4 (Custom XML Data Storage Part).
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import uuid
13+
from typing import TYPE_CHECKING, Iterable, Union, cast
14+
15+
from lxml.etree import _Element # pyright: ignore[reportPrivateUsage]
16+
17+
from pptx.opc.constants import CONTENT_TYPE as CT
18+
from pptx.opc.constants import RELATIONSHIP_TYPE as RT
19+
from pptx.opc.package import XmlPart
20+
from pptx.opc.packuri import PackURI
21+
from pptx.oxml import parse_xml
22+
from pptx.oxml.custom_xml import CT_DatastoreItem
23+
from pptx.oxml.xmlchemy import BaseOxmlElement
24+
25+
if TYPE_CHECKING:
26+
from pptx.package import Package
27+
28+
29+
XmlPayload = Union[bytes, str, _Element]
30+
31+
32+
class CustomXmlPropertiesPart(XmlPart):
33+
"""Corresponds to part named `/customXml/itemPropsN.xml`.
34+
35+
Carries the `datastoreItem` GUID identifying its sibling `CustomXmlPart`
36+
across edits, plus the optional list of `<ds:schemaRef>` URIs the data part
37+
claims to conform to.
38+
"""
39+
40+
_element: CT_DatastoreItem
41+
42+
@classmethod
43+
def new(
44+
cls,
45+
package: "Package",
46+
partname: PackURI,
47+
datastore_item_id: str,
48+
schema_refs: Iterable[str] = (),
49+
) -> "CustomXmlPropertiesPart":
50+
"""Return a fresh `CustomXmlPropertiesPart` at `partname` for `package`."""
51+
item_elm = CT_DatastoreItem.new(datastore_item_id, schema_refs=schema_refs)
52+
return cls(partname, CT.OFC_CUSTOM_XML_PROPERTIES, package, item_elm)
53+
54+
@property
55+
def datastore_item_id(self) -> str:
56+
"""The `ds:itemID` attribute — a GUID like `"{1A2B...}"`."""
57+
return self._element.itemID
58+
59+
@datastore_item_id.setter
60+
def datastore_item_id(self, value: str) -> None:
61+
self._element.itemID = value
62+
63+
@property
64+
def schema_refs(self) -> tuple[str, ...]:
65+
"""Tuple of `<ds:schemaRef ds:uri>` values in document order."""
66+
return self._element.schema_ref_uris
67+
68+
def add_schema_ref(self, uri: str) -> None:
69+
"""Append a `<ds:schemaRef ds:uri="...">` (idempotent on `uri`)."""
70+
self._element.add_schema_ref(uri)
71+
72+
def remove_schema_ref(self, uri: str) -> bool:
73+
"""Remove the schemaRef with `uri`, returning True if found."""
74+
return self._element.remove_schema_ref(uri)
75+
76+
77+
class CustomXmlPart(XmlPart):
78+
"""Corresponds to part named `/customXml/itemN.xml`.
79+
80+
Holds an arbitrary XML payload supplied by the caller. The payload's root
81+
element name and namespaces are caller-defined — `python-pptx` does not
82+
impose a schema. Each `CustomXmlPart` has a sibling `CustomXmlPropertiesPart`
83+
that carries the part's `datastoreItem` GUID; the rel between them is of
84+
type `RT.CUSTOM_XML_PROPS`.
85+
86+
NOTE: This class is intentionally **not** registered with `PartFactory`
87+
against `CT.XML`. Loaded `application/xml` parts are produced as base
88+
`Part` instances, and the Phase-3 `CustomXmlParts` collection upgrades
89+
them on enumeration. See `Plans/customxml-implementation-plan.md` §3.6.
90+
"""
91+
92+
@classmethod
93+
def new_pair(
94+
cls,
95+
package: "Package",
96+
xml_payload: XmlPayload,
97+
*,
98+
datastore_item_id: str | None = None,
99+
schema_refs: Iterable[str] = (),
100+
) -> "CustomXmlPart":
101+
"""Create a paired CustomXmlPart + CustomXmlPropertiesPart in `package`.
102+
103+
Returns the data part. The props part is related from the data part
104+
via `RT.CUSTOM_XML_PROPS`. Neither is yet related from any outside
105+
source — that is the caller's job (Phase-3 `CustomXmlParts.add(...)`).
106+
107+
`xml_payload` may be `bytes`, a `str`, or an existing lxml `_Element`.
108+
If `datastore_item_id` is omitted a fresh `uuid4()` is generated and
109+
wrapped in curly braces to match Office's format.
110+
111+
Partname allocation: `/customXml/itemN.xml` and `/customXml/itemPropsN.xml`
112+
share the same `N`, picked as the next free index across existing data
113+
parts in `package` (props parts are looked up via the data → props rel,
114+
not via partname pattern).
115+
"""
116+
idx = _next_customxml_index(package)
117+
data_partname = PackURI("/customXml/item%d.xml" % idx)
118+
props_partname = PackURI("/customXml/itemProps%d.xml" % idx)
119+
120+
element = _parse_payload(xml_payload)
121+
data_part = cls(data_partname, CT.XML, package, element)
122+
123+
if datastore_item_id is None:
124+
datastore_item_id = "{%s}" % str(uuid.uuid4()).upper()
125+
126+
props_part = CustomXmlPropertiesPart.new(
127+
package, props_partname, datastore_item_id, schema_refs
128+
)
129+
130+
data_part.relate_to(props_part, RT.CUSTOM_XML_PROPS)
131+
return data_part
132+
133+
@property
134+
def props_part(self) -> CustomXmlPropertiesPart:
135+
"""Return the related `CustomXmlPropertiesPart` for this data part.
136+
137+
Raises `KeyError` if the props rel is missing — a malformed package
138+
the caller is expected to repair via `CustomXmlPart.new_pair(...)`.
139+
"""
140+
return cast(
141+
CustomXmlPropertiesPart, self.part_related_by(RT.CUSTOM_XML_PROPS)
142+
)
143+
144+
@property
145+
def datastore_item_id(self) -> str:
146+
"""Convenience accessor delegating to the sibling props part."""
147+
return self.props_part.datastore_item_id
148+
149+
@datastore_item_id.setter
150+
def datastore_item_id(self, value: str) -> None:
151+
self.props_part.datastore_item_id = value
152+
153+
@property
154+
def schema_refs(self) -> tuple[str, ...]:
155+
"""Convenience accessor delegating to the sibling props part."""
156+
return self.props_part.schema_refs
157+
158+
def add_schema_ref(self, uri: str) -> None:
159+
"""Convenience pass-through to the sibling props part."""
160+
self.props_part.add_schema_ref(uri)
161+
162+
def remove_schema_ref(self, uri: str) -> bool:
163+
"""Convenience pass-through to the sibling props part."""
164+
return self.props_part.remove_schema_ref(uri)
165+
166+
@property
167+
def element(self) -> BaseOxmlElement:
168+
"""Live root element of the customXml payload.
169+
170+
Mutating its children mutates the part; the next `package.save(...)`
171+
will serialize the updated tree.
172+
"""
173+
return self._element
174+
175+
def replace_xml(self, xml_payload: XmlPayload) -> None:
176+
"""Replace the entire XML payload with `xml_payload`.
177+
178+
The root element is replaced wholesale; `datastore_item_id` and
179+
`schema_refs` are unaffected (they live on the sibling props part).
180+
"""
181+
self._element = _parse_payload(xml_payload)
182+
183+
184+
def _parse_payload(xml_payload: XmlPayload) -> BaseOxmlElement:
185+
"""Coerce `xml_payload` to a `BaseOxmlElement` root.
186+
187+
Accepts bytes (parsed verbatim), str (utf-8 encoded then parsed), or an
188+
already-parsed lxml `_Element` (returned as-is). Raises `TypeError` for
189+
anything else so the caller fails fast at the boundary.
190+
"""
191+
if isinstance(xml_payload, bytes):
192+
return cast("BaseOxmlElement", parse_xml(xml_payload))
193+
if isinstance(xml_payload, str):
194+
return cast("BaseOxmlElement", parse_xml(xml_payload.encode("utf-8")))
195+
if isinstance(xml_payload, _Element):
196+
return cast("BaseOxmlElement", xml_payload)
197+
raise TypeError(
198+
"xml_payload must be bytes, str, or lxml _Element; got %s"
199+
% type(xml_payload).__name__
200+
)
201+
202+
203+
def _next_customxml_index(package: "Package") -> int:
204+
"""Return the next free `N` for `/customXml/itemN.xml`.
205+
206+
Walks `package.iter_parts()` and skips `itemProps*.xml` parts. Reuses
207+
gaps in the sequence (e.g. if items 1 and 3 exist, returns 2).
208+
"""
209+
used: set[int] = set()
210+
data_prefix = "/customXml/item"
211+
props_prefix = "/customXml/itemProps"
212+
for part in package.iter_parts():
213+
partname = str(part.partname)
214+
if not partname.startswith(data_prefix):
215+
continue
216+
if partname.startswith(props_prefix):
217+
continue
218+
# partname looks like /customXml/itemN.xml
219+
suffix = partname[len(data_prefix) :]
220+
if not suffix.endswith(".xml"):
221+
continue
222+
try:
223+
used.add(int(suffix[: -len(".xml")]))
224+
except ValueError:
225+
continue
226+
n = 1
227+
while n in used:
228+
n += 1
229+
return n

0 commit comments

Comments
 (0)