Skip to content

Commit 5c7c149

Browse files
Matthew HoroszowskiMatthew Horoszowski
authored andcommitted
fix(slides): unify master/layout id pool in append_from to clear PowerPoint repair
`Presentation.append_from` triggered PowerPoint's bad-content / repair dialog on first open of any multi-master output. Three distinct OOXML id collisions inherited from the shared default.pptx template needed scrubbing during cross-package master/layout deepcopy: 1. `<p:sldMasterId id>` and `<p:sldLayoutId id>` share one identifier pool per ECMA-376 sec 19.2.1.34 / 19.2.1.27, and PowerPoint enforces uniqueness across both. The new master2 was being allocated id 2147483649 (one above target's lone master at 2147483648) — but 2147483649 was already master1's first layout id. PowerPoint's repair output renumbered master2 to 2147483660. Root fix: build a unified pool from BOTH master and layout ids in the target, allocate master id first (consumes one slot), then renumber layout ids above it. End state matches PowerPoint's repair ordering (master2= 2147483660, master2's layouts=2147483661..2147483671). 2. `<p:sldLayoutId id>` collided across masters because deepcopy of the source master carried the source's layout ids verbatim, identical to target's existing master's layout ids when both came from the same default.pptx. Spec: unique within the layout collection; PowerPoint: unique across the presentation. 3. `<p14:creationId val>` was deterministic in default.pptx (master: 2209977519, plus 11 fixed layout vals), so two presentations from the template ended up with identical vals across their parallel masters and layouts. Refresh each val to a fresh secrets.randbits(32) on deepcopy to keep the extLst structure intact while breaking the collision. Diagnostic path: PowerPoint's repair dialog gave only the generic "PowerPoint found a problem with content" with no error-line detail. Iterated by having the maintainer save the repaired copy and diffing against the original; the rewritten `<p:sldMasterId id>` value (2147483649 → 2147483660) revealed the master/layout pool sharing. Surface: - src/pptx/parts/slide.py: new _used_master_layout_ids_in_target, _renumber_sldLayoutIds, _refresh_creation_ids; _add_sldMasterId_to _presentation accepts unified pool; _port_master and _port_layout _standalone apply scrubs at deepcopy points - tests/test_append_from.py: DescribePresentation_AppendFrom_OOXML Integrity class with 4 zip-level integrity tests (sldLayoutId uniqueness across masters, master id disjoint from layout id pool, master creationId uniqueness, layout creationId uniqueness) Refs #11. Tests: 3166 unit (+4 new) + 994 behave scenarios all passing. Lint: ruff check + ruff format clean.
1 parent adb869c commit 5c7c149

2 files changed

Lines changed: 233 additions & 10 deletions

File tree

src/pptx/parts/slide.py

Lines changed: 114 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import contextlib
66
import copy
77
import re
8+
import secrets
89
from io import BytesIO
910
from typing import IO, TYPE_CHECKING, cast
1011

@@ -293,6 +294,9 @@ def duplicate(self) -> SlidePart:
293294
# ---------------------------------------------------------------------------
294295

295296
_RELS_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
297+
_P_NS = "{http://schemas.openxmlformats.org/presentationml/2006/main}"
298+
_P14_NS = "{http://schemas.microsoft.com/office/powerpoint/2010/main}"
299+
_OOXML_LAYOUT_ID_FLOOR = 2147483648 # uint32 floor per ECMA-376 sec 19.2.1.27.
296300

297301
# Reltypes filtered out during slide duplication. NOTES_SLIDE is wired
298302
# explicitly by |Slides.duplicate| so the new notes-slide back-references
@@ -382,6 +386,13 @@ def __init__(self, target_pres_part: PresentationPart):
382386
self._master_map: dict[Part, Part] = {}
383387
self._layout_map: dict[Part, Part] = {}
384388
self._theme_map: dict[Part, Part] = {}
389+
# ---unified id pool: master ids and layout ids share one pool per
390+
# OOXML spec, and PowerPoint enforces uniqueness across both.
391+
# Both `_add_sldMasterId_to_presentation` and
392+
# `_renumber_sldLayoutIds` allocate from this set and update it
393+
# so sequential master ports don't collide with each other or
394+
# with target's existing layout ids.
395+
self._used_layout_ids = _used_master_layout_ids_in_target(target_pres_part)
385396

386397
# -- Public entry point ---------------------------------------------------
387398

@@ -512,6 +523,7 @@ def _port_layout_standalone(self, src_layout_part: SlideLayoutPart) -> SlideLayo
512523

513524
new_partname = self.target_package.next_partname("/ppt/slideLayouts/slideLayout%d.xml")
514525
new_element = copy.deepcopy(src_layout_part._element)
526+
_refresh_creation_ids(new_element)
515527
new_layout_part = SlideLayoutPart(
516528
new_partname, src_layout_part.content_type, self.target_package, new_element
517529
)
@@ -537,11 +549,18 @@ def _port_master(self, src_master_part: SlideMasterPart) -> SlideMasterPart:
537549
if src_master_part in self._master_map:
538550
return cast(SlideMasterPart, self._master_map[src_master_part])
539551

540-
# 1. Create new master part (deep-copy element)
552+
# 1. Create new master part (deep-copy element).
553+
# `<p14:creationId val>` is deterministic in default.pptx, so
554+
# deepcopied creationIds collide with target's; refresh now.
555+
# sldLayoutId renumbering is deferred to step 3 so layout ids
556+
# end up ABOVE the new master's id — PowerPoint's own repair
557+
# output matches this ordering and ECMA-376 sec 19.2.1.34 /
558+
# 19.2.1.27 confirm master and layout ids share one pool.
541559
new_master_partname = self.target_package.next_partname(
542560
"/ppt/slideMasters/slideMaster%d.xml"
543561
)
544562
new_element = copy.deepcopy(src_master_part._element)
563+
_refresh_creation_ids(new_element)
545564
new_master_part = SlideMasterPart(
546565
new_master_partname,
547566
src_master_part.content_type,
@@ -550,9 +569,16 @@ def _port_master(self, src_master_part: SlideMasterPart) -> SlideMasterPart:
550569
)
551570
# 2. Register in map + relate to presentation FIRST so iter_parts
552571
# reaches the master before any subsequent next_partname call.
572+
# Allocate the new master's id from the unified master/layout
573+
# pool — collision with an existing layout id flags repair.
553574
self._master_map[src_master_part] = new_master_part
554575
new_master_rId = self.target_pres_part.relate_to(new_master_part, RT.SLIDE_MASTER)
555-
_add_sldMasterId_to_presentation(self.target_pres_part, new_master_rId)
576+
_add_sldMasterId_to_presentation(
577+
self.target_pres_part, new_master_rId, self._used_layout_ids
578+
)
579+
# 3. Renumber the deepcopied sldLayoutIds NOW, after the master id
580+
# has consumed its slot in the pool, so layouts land above it.
581+
_renumber_sldLayoutIds(new_element, self._used_layout_ids)
556582

557583
# 3. For each source layout owned by this master: create + register
558584
# + relate to the master IMMEDIATELY (before next next_partname).
@@ -567,6 +593,7 @@ def _port_master(self, src_master_part: SlideMasterPart) -> SlideMasterPart:
567593
"/ppt/slideLayouts/slideLayout%d.xml"
568594
)
569595
new_layout_element = copy.deepcopy(src_layout._element)
596+
_refresh_creation_ids(new_layout_element)
570597
new_layout = SlideLayoutPart(
571598
new_layout_partname,
572599
src_layout.content_type,
@@ -761,25 +788,102 @@ def _duplicate_blob_part_into(src: Part, target_package) -> Part:
761788
return cls(new_partname, src.content_type, target_package, src.blob)
762789

763790

764-
def _add_sldMasterId_to_presentation(pres_part: PresentationPart, rId: str) -> None:
791+
def _used_master_layout_ids_in_target(pres_part: PresentationPart) -> set[int]:
792+
"""Return every `<p:sldMasterId>` AND `<p:sldLayoutId>` `id` in target.
793+
794+
PowerPoint enforces uniqueness across the SHARED master/layout id pool
795+
(per ECMA-376 sec 19.2.1.34 sldMasterId / sec 19.2.1.27 sldLayoutId,
796+
both consume the same conceptual identifier space). A new master id
797+
that collides with an existing layout id flags the file for repair on
798+
load.
799+
"""
800+
used: set[int] = set()
801+
sldMasterIdLst = pres_part._element.find(f"{_P_NS}sldMasterIdLst")
802+
if sldMasterIdLst is not None:
803+
for smi in sldMasterIdLst.findall(f"{_P_NS}sldMasterId"):
804+
raw = smi.get("id")
805+
if raw is None:
806+
continue
807+
with contextlib.suppress(ValueError):
808+
used.add(int(raw))
809+
for rel in pres_part.rels.values():
810+
if rel.is_external or rel.reltype != RT.SLIDE_MASTER:
811+
continue
812+
master_element = rel.target_part._element
813+
sldLayoutIdLst = master_element.find(f"{_P_NS}sldLayoutIdLst")
814+
if sldLayoutIdLst is None:
815+
continue
816+
for sli in sldLayoutIdLst.findall(f"{_P_NS}sldLayoutId"):
817+
raw = sli.get("id")
818+
if raw is None:
819+
continue
820+
with contextlib.suppress(ValueError):
821+
used.add(int(raw))
822+
return used
823+
824+
825+
def _renumber_sldLayoutIds(element, used_ids: set[int]) -> None:
826+
"""Reassign `<p:sldLayoutId>` `id` attributes on a deepcopied master.
827+
828+
Mutates `used_ids` in place so back-to-back ports of multiple masters
829+
keep allocating unique ids relative to one another, not just relative
830+
to target's pre-existing masters.
831+
"""
832+
sldLayoutIdLst = element.find(f"{_P_NS}sldLayoutIdLst")
833+
if sldLayoutIdLst is None:
834+
return
835+
next_id = max(used_ids | {_OOXML_LAYOUT_ID_FLOOR - 1}) + 1
836+
for sli in sldLayoutIdLst.findall(f"{_P_NS}sldLayoutId"):
837+
sli.set("id", str(next_id))
838+
used_ids.add(next_id)
839+
next_id += 1
840+
841+
842+
def _refresh_creation_ids(element) -> None:
843+
"""Reassign every `<p14:creationId val>` to a fresh uint32 value.
844+
845+
The default `default.pptx` ships with deterministic vals on each
846+
master / layout `<p14:creationId>`, so two presentations built from
847+
it carry identical vals. PowerPoint flags those as bad content.
848+
Replacing the val (rather than stripping the element) keeps the
849+
extLst structure PowerPoint expects intact.
850+
"""
851+
creation_id_tag = f"{_P14_NS}creationId"
852+
for node in element.iter(creation_id_tag):
853+
# 32-bit unsigned, cryptographically strong enough for collision avoidance.
854+
node.set("val", str(secrets.randbits(32)))
855+
856+
857+
def _add_sldMasterId_to_presentation(
858+
pres_part: PresentationPart, rId: str, used_ids: set[int] | None = None
859+
) -> None:
765860
"""Append a `<p:sldMasterId>` to the presentation, allocating an id.
766861
767862
Slide-master ids in OOXML are required uint32 values typically in the
768863
high range (default first master is `2147483648`). python-pptx's
769864
`CT_SlideMasterIdListEntry` only declares the `rId` attribute, so we
770865
set / read `id` directly on the lxml element to round-trip cleanly.
866+
867+
`used_ids`, when supplied, is the unified master/layout id pool. The
868+
new id is taken above its current max and added to it; PowerPoint
869+
enforces master/layout id uniqueness across this shared pool, and a
870+
collision flags the file for repair on load. When `used_ids` is None,
871+
only existing sldMasterId values are scanned (back-compat for any
872+
out-of-band caller).
771873
"""
772874
sldMasterIdLst = pres_part._element.get_or_add_sldMasterIdLst()
773-
used_ids: list[int] = []
774-
for sm in sldMasterIdLst.sldMasterId_lst:
775-
raw = sm.get("id")
776-
if raw is not None:
777-
with contextlib.suppress(ValueError):
778-
used_ids.append(int(raw))
779-
next_id = max(used_ids + [2147483647]) + 1
875+
if used_ids is None:
876+
used_ids = set()
877+
for sm in sldMasterIdLst.sldMasterId_lst:
878+
raw = sm.get("id")
879+
if raw is not None:
880+
with contextlib.suppress(ValueError):
881+
used_ids.add(int(raw))
882+
next_id = max(used_ids | {_OOXML_LAYOUT_ID_FLOOR - 1}) + 1
780883
sldMasterId = sldMasterIdLst._add_sldMasterId()
781884
sldMasterId.rId = rId
782885
sldMasterId.set("id", str(next_id))
886+
used_ids.add(next_id)
783887

784888

785889
def _add_sldLayoutId_to_master(master_part, rId: str) -> None:

tests/test_append_from.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,3 +527,122 @@ def it_does_not_carry_comments_through_an_append(self):
527527

528528
with pytest.raises(KeyError):
529529
new_slide.part.part_related_by(RT.COMMENTS)
530+
531+
532+
# ---------------------------------------------------------------------------
533+
# PowerPoint round-trip integrity (uniqueness of OOXML-spec ID attrs).
534+
# ---------------------------------------------------------------------------
535+
536+
537+
class DescribePresentation_AppendFrom_OOXMLIntegrity(object):
538+
"""PowerPoint flags 'bad content / repair' on collisions across masters.
539+
540+
Two distinct collision classes appear when porting a master from a
541+
source presentation that was built from the same default template as
542+
the target:
543+
544+
1. ``<p:sldLayoutId id="...">`` — the deepcopy carries source's layout
545+
ids verbatim, which match target's already-existing master's ids.
546+
2. ``<p14:creationId val="...">`` — the default ``default.pptx`` ships
547+
with a deterministic creationId on every master and layout, so
548+
both masters and every parallel layout end up with identical vals.
549+
550+
Both surface only after a save -> reopen, hence the zip-level checks.
551+
"""
552+
553+
def _save_to_zip(self, prs):
554+
import zipfile
555+
556+
buf = io.BytesIO()
557+
prs.save(buf)
558+
buf.seek(0)
559+
return zipfile.ZipFile(buf)
560+
561+
def it_assigns_unique_sldLayoutId_values_across_masters(self):
562+
import re
563+
564+
src = _seed_presentation_with(1)
565+
tgt = Presentation()
566+
567+
tgt.append_from(src)
568+
569+
with self._save_to_zip(tgt) as zf:
570+
all_ids: list[str] = []
571+
for n in zf.namelist():
572+
if not (n.startswith("ppt/slideMasters/slideMaster") and n.endswith(".xml")):
573+
continue
574+
xml = zf.read(n).decode()
575+
all_ids.extend(re.findall(r'<p:sldLayoutId id="(\d+)"', xml))
576+
577+
assert len(all_ids) == len(set(all_ids)), (
578+
"duplicate sldLayoutId across masters: %r" % all_ids
579+
)
580+
581+
def it_assigns_master_id_outside_existing_layout_id_pool(self):
582+
"""sldMasterId and sldLayoutId share one id pool per ECMA-376.
583+
584+
Regression for the PowerPoint-repair trigger where the new master's
585+
id collided with target's existing master's first layout id (both
586+
landed at 2147483649 because the master-id allocator only consulted
587+
master ids). PowerPoint flagged the file as bad content.
588+
"""
589+
import re
590+
591+
src = _seed_presentation_with(1)
592+
tgt = Presentation()
593+
594+
tgt.append_from(src)
595+
596+
with self._save_to_zip(tgt) as zf:
597+
pres_xml = zf.read("ppt/presentation.xml").decode()
598+
master_ids = set(re.findall(r'<p:sldMasterId[^/]*\bid="(\d+)"', pres_xml))
599+
layout_ids: set[str] = set()
600+
for n in zf.namelist():
601+
if not (n.startswith("ppt/slideMasters/slideMaster") and n.endswith(".xml")):
602+
continue
603+
xml = zf.read(n).decode()
604+
layout_ids.update(re.findall(r'<p:sldLayoutId id="(\d+)"', xml))
605+
606+
assert master_ids.isdisjoint(layout_ids), (
607+
"master id collides with a layout id: master=%r layouts=%r" % (master_ids, layout_ids)
608+
)
609+
610+
def it_does_not_duplicate_p14_creationId_across_masters(self):
611+
import re
612+
613+
src = _seed_presentation_with(1)
614+
tgt = Presentation()
615+
616+
tgt.append_from(src)
617+
618+
with self._save_to_zip(tgt) as zf:
619+
master_creation_ids: list[str] = []
620+
for n in zf.namelist():
621+
if not (n.startswith("ppt/slideMasters/slideMaster") and n.endswith(".xml")):
622+
continue
623+
xml = zf.read(n).decode()
624+
master_creation_ids.extend(re.findall(r'creationId[^>]*val="([^"]+)"', xml))
625+
626+
assert len(master_creation_ids) == len(set(master_creation_ids)), (
627+
"duplicate master creationId: %r" % master_creation_ids
628+
)
629+
630+
def it_does_not_duplicate_p14_creationId_across_layouts(self):
631+
import re
632+
633+
src = _seed_presentation_with(1)
634+
tgt = Presentation()
635+
636+
tgt.append_from(src)
637+
638+
with self._save_to_zip(tgt) as zf:
639+
layout_creation_ids: list[str] = []
640+
for n in zf.namelist():
641+
if not (n.startswith("ppt/slideLayouts/slideLayout") and n.endswith(".xml")):
642+
continue
643+
xml = zf.read(n).decode()
644+
layout_creation_ids.extend(re.findall(r'creationId[^>]*val="([^"]+)"', xml))
645+
646+
assert len(layout_creation_ids) == len(set(layout_creation_ids)), (
647+
"duplicate layout creationId: %r" % layout_creation_ids
648+
)

0 commit comments

Comments
 (0)