diff --git a/features/append-from.feature b/features/append-from.feature new file mode 100644 index 000000000..cfbae526c --- /dev/null +++ b/features/append-from.feature @@ -0,0 +1,29 @@ +Feature: Presentation.append_from — cross-deck slide copy + In order to assemble a deck from slides scattered across multiple .pptx files + As a developer using python-pptx + I need a single API call that copies slides between two presentations, + porting the slide-master / layout / theme and deduping images at the target + + + Scenario: Presentation.append_from(other) appends every source slide + Given two seeded presentations source and target + When I call target.append_from(source) + Then target's slide count grew by source's slide count + And target's master count grew by 1 + + + Scenario: Presentation.append_from(other, slide_indexes) appends selected slides + Given two seeded presentations source and target + When I call target.append_from(source, slide_indexes=[0, 1]) + Then target's slide count grew by 2 + + + Scenario: append_from with empty slide_indexes is a no-op + Given two seeded presentations source and target + When I call target.append_from(source, slide_indexes=[]) + Then target's slide count is unchanged + + + Scenario: append_from raises IndexError for out-of-range index + Given two seeded presentations source and target + Then calling target.append_from(source, slide_indexes=[99]) raises IndexError diff --git a/features/steps/slides.py b/features/steps/slides.py index f2d6206ec..55fc1850a 100644 --- a/features/steps/slides.py +++ b/features/steps/slides.py @@ -236,3 +236,77 @@ def then_duplicate_index_99_raises(context): with pytest.raises(IndexError): context.slides.duplicate(context.slides[0], index=99) + + +# -- append_from steps ----------------------------------------------------- + + +@given("two seeded presentations source and target") +def given_two_seeded_presentations(context): + from pptx.util import Inches + + src = Presentation() + layout = src.slide_layouts[6] + for i in range(3): + s = src.slides.add_slide(layout) + s.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)).text_frame.text = ( + "src %d" % i + ) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + context.source_pres = src + context.target_pres = tgt + context.target_slides_before = len(tgt.slides) + context.target_masters_before = sum(1 for _ in tgt.slide_masters) + + +@when("I call target.append_from(source)") +def when_target_append_from_source_all(context): + context.new_slides = context.target_pres.append_from(context.source_pres) + + +@when("I call target.append_from(source, slide_indexes=[0, 1])") +def when_target_append_from_source_indexes_0_1(context): + context.new_slides = context.target_pres.append_from( + context.source_pres, slide_indexes=[0, 1] + ) + + +@when("I call target.append_from(source, slide_indexes=[])") +def when_target_append_from_source_indexes_empty(context): + context.new_slides = context.target_pres.append_from(context.source_pres, slide_indexes=[]) + + +@then("target's slide count grew by source's slide count") +def then_target_grew_by_source_slide_count(context): + expected = context.target_slides_before + len(context.source_pres.slides) + actual = len(context.target_pres.slides) + assert actual == expected, "expected %d slides, got %d" % (expected, actual) + + +@then("target's master count grew by 1") +def then_target_master_count_grew_by_1(context): + actual = sum(1 for _ in context.target_pres.slide_masters) + assert actual == context.target_masters_before + 1, ( + "expected %d masters, got %d" % (context.target_masters_before + 1, actual) + ) + + +@then("target's slide count grew by 2") +def then_target_slide_count_grew_by_2(context): + actual = len(context.target_pres.slides) + assert actual == context.target_slides_before + 2 + + +@then("target's slide count is unchanged") +def then_target_slide_count_unchanged(context): + actual = len(context.target_pres.slides) + assert actual == context.target_slides_before + + +@then("calling target.append_from(source, slide_indexes=[99]) raises IndexError") +def then_append_from_index_99_raises(context): + import pytest + + with pytest.raises(IndexError): + context.target_pres.append_from(context.source_pres, slide_indexes=[99]) diff --git a/src/pptx/parts/slide.py b/src/pptx/parts/slide.py index 96b70d309..7311e516f 100644 --- a/src/pptx/parts/slide.py +++ b/src/pptx/parts/slide.py @@ -2,8 +2,11 @@ from __future__ import annotations +import contextlib import copy import re +import secrets +from io import BytesIO from typing import IO, TYPE_CHECKING, cast from pptx.enum.shapes import PROG_ID @@ -15,9 +18,13 @@ from pptx.oxml.theme import CT_OfficeStyleSheet from pptx.parts.chart import ChartPart from pptx.parts.embeddedpackage import EmbeddedPackagePart +from pptx.parts.image import Image, ImagePart from pptx.slide import NotesMaster, NotesSlide, Slide, SlideLayout, SlideMaster from pptx.util import lazyproperty +if TYPE_CHECKING: + from pptx.parts.presentation import PresentationPart + if TYPE_CHECKING: from pptx.chart.data import ChartData from pptx.enum.chart import XL_CHART_TYPE @@ -287,6 +294,9 @@ def duplicate(self) -> SlidePart: # --------------------------------------------------------------------------- _RELS_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}" +_P_NS = "{http://schemas.openxmlformats.org/presentationml/2006/main}" +_P14_NS = "{http://schemas.microsoft.com/office/powerpoint/2010/main}" +_OOXML_LAYOUT_ID_FLOOR = 2147483648 # uint32 floor per ECMA-376 sec 19.2.1.27. # Reltypes filtered out during slide duplication. NOTES_SLIDE is wired # explicitly by |Slides.duplicate| so the new notes-slide back-references @@ -333,6 +343,569 @@ def _remap_rId_attrs(element, rId_map: dict[str, str]) -> None: el.attrib[attr_name] = rId_map[old] +# --------------------------------------------------------------------------- +# Cross-package porting (Phase 3 of issue #11 — `Presentation.append_from`). +# +# Phase 2 (`Slide.duplicate`) cloned within a SINGLE package — all parts +# referenced by the source slide already lived in `target_package`, so the +# relate_to() calls naturally deduped at the rel level. Phase 3 ports parts +# BETWEEN two packages, so the dedup machinery has to come from us. +# +# Strategy: a single `_PortContext` per `append_from` call carries a set of +# {source_part: target_part} maps for slide-master, slide-layout, and theme. +# Image/media dedup at target uses the package's existing SHA1-based +# `get_or_add_image_part` / `_media_parts` machinery — we just hand it the +# foreign blob via BytesIO. The recursion master→layouts→master is broken +# by registering the new master in the map BEFORE walking its layouts. +# --------------------------------------------------------------------------- + + +# Reltypes that — when encountered on a master, layout, or theme — are +# private to that part and must be ported (deep-copy + new partname). The +# notes-slide / comments handling sticks with Phase 2's drop-list at the +# slide level; layout/master have neither. +_LAYOUT_OR_MASTER_PRIVATE_PARTNAMES = { + # parts addressable by partname suffix; we only need this at the + # `_port_blob_xml_part` fallback for parts whose class doesn't ship a + # `partname_template` attribute. +} + + +class _PortContext: + """Per-`append_from`-call cache + helpers for cross-package porting. + + Holds the `{source_part: target_part}` maps that ensure source + slide-masters / layouts / themes shared across multiple appended source + slides land as a single target part. Constructed once per call to + `Presentation.append_from`; not state-of-the-Presentation. + """ + + def __init__(self, target_pres_part: PresentationPart): + self.target_pres_part = target_pres_part + self.target_package = target_pres_part.package + self._master_map: dict[Part, Part] = {} + self._layout_map: dict[Part, Part] = {} + self._theme_map: dict[Part, Part] = {} + # ---unified id pool: master ids and layout ids share one pool per + # OOXML spec, and PowerPoint enforces uniqueness across both. + # Both `_add_sldMasterId_to_presentation` and + # `_renumber_sldLayoutIds` allocate from this set and update it + # so sequential master ports don't collide with each other or + # with target's existing layout ids. + self._used_layout_ids = _used_master_layout_ids_in_target(target_pres_part) + + # -- Public entry point --------------------------------------------------- + + def port_slide(self, src_slide_part: SlidePart) -> SlidePart: + """Port a single source slide into the target package. + + Returns the new |SlidePart| in target. Caller is responsible for + registering the new slide with `target.part` (rId + sldId). + """ + target_layout_part = self._port_layout( + cast(SlideLayoutPart, src_slide_part.part_related_by(RT.SLIDE_LAYOUT)) + ) + + new_partname = self.target_package.next_partname("/ppt/slides/slide%d.xml") + new_element = copy.deepcopy(src_slide_part._element) + new_slide_part = SlidePart(new_partname, CT.PML_SLIDE, self.target_package, new_element) + + rId_map = self._replicate_slide_rels(src_slide_part, new_slide_part, target_layout_part) + _remap_rId_attrs(new_element, rId_map) + + return new_slide_part + + def port_notes_slide( + self, src_slide_part: SlidePart, new_slide_part: SlidePart + ) -> NotesSlidePart: + """Build a fresh notes-slide for `new_slide_part` from src's notes. + + Mirrors Phase 2's `duplicate_notes_slide_for` but routes through + the target package and uses the target's existing notes-master. + Cross-package gotcha #961: the notes-slide's `RT.SLIDE` back-rel + is rewired to the new slide; the `RT.NOTES_MASTER` rel points at + target's existing notes-master (NOT a port of source's). + """ + src_notes_part = cast(NotesSlidePart, src_slide_part.part_related_by(RT.NOTES_SLIDE)) + new_partname = self.target_package.next_partname("/ppt/notesSlides/notesSlide%d.xml") + new_element = copy.deepcopy(src_notes_part._element) + new_notes_part = NotesSlidePart( + new_partname, CT.PML_NOTES_SLIDE, self.target_package, new_element + ) + + target_notes_master_part = self.target_pres_part.notes_master_part + + rId_map: dict[str, str] = {} + for rId, rel in src_notes_part.rels.items(): + if rel.is_external: + new_rId = new_notes_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.SLIDE: + new_rId = new_notes_part.relate_to(new_slide_part, RT.SLIDE) + elif rel.reltype == RT.NOTES_MASTER: + # ---target's existing notes-master, not ported from source--- + new_rId = new_notes_part.relate_to(target_notes_master_part, RT.NOTES_MASTER) + elif rel.reltype == RT.IMAGE: + new_target = self._port_image_part(cast(ImagePart, rel.target_part)) + new_rId = new_notes_part.relate_to(new_target, RT.IMAGE) + else: + # ---fall back to a deep-copy port for anything unexpected--- + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_rId = new_notes_part.relate_to(new_target, rel.reltype) + rId_map[rId] = new_rId + _remap_rId_attrs(new_element, rId_map) + + new_slide_part.relate_to(new_notes_part, RT.NOTES_SLIDE) + return new_notes_part + + # -- Per-reltype porting -------------------------------------------------- + + def _replicate_slide_rels( + self, + src_slide_part: SlidePart, + new_slide_part: SlidePart, + target_layout_part: SlideLayoutPart, + ) -> dict[str, str]: + """Mirror src slide's rels onto new slide in target package.""" + rId_map: dict[str, str] = {} + for rId, rel in src_slide_part.rels.items(): + if rel.reltype in _DUP_DROP_RELTYPES_SLIDE: + continue + if rel.is_external: + new_rId = new_slide_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.SLIDE_LAYOUT: + # ---use the already-ported (or shared) target layout--- + new_rId = new_slide_part.relate_to(target_layout_part, RT.SLIDE_LAYOUT) + elif rel.reltype == RT.IMAGE: + new_target = self._port_image_part(cast(ImagePart, rel.target_part)) + new_rId = new_slide_part.relate_to(new_target, RT.IMAGE) + elif rel.reltype in (RT.MEDIA, RT.VIDEO, RT.AUDIO): + new_target = self._port_image_part_like(cast(Part, rel.target_part)) + new_rId = new_slide_part.relate_to(new_target, rel.reltype) + elif rel.reltype == RT.CHART: + new_target = _duplicate_chart_part_into( + cast(ChartPart, rel.target_part), self.target_package + ) + new_rId = new_slide_part.relate_to(new_target, rel.reltype) + elif rel.reltype in (RT.OLE_OBJECT, RT.PACKAGE): + new_target = _duplicate_blob_part_into( + cast(Part, rel.target_part), self.target_package + ) + new_rId = new_slide_part.relate_to(new_target, rel.reltype) + else: + # ---unknown reltype: fall back to deep-copy port to be safe--- + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_rId = new_slide_part.relate_to(new_target, rel.reltype) + rId_map[rId] = new_rId + return rId_map + + def _port_layout(self, src_layout_part: SlideLayoutPart) -> SlideLayoutPart: + """Port a slide-layout into target package (with master + theme).""" + if src_layout_part in self._layout_map: + return cast(SlideLayoutPart, self._layout_map[src_layout_part]) + + # ---porting the master will fill self._layout_map for ALL of master's + # layouts (master owns its layout tree); after that returns, our + # target layout is in the map--- + src_master_part = cast(SlideMasterPart, src_layout_part.part_related_by(RT.SLIDE_MASTER)) + self._port_master(src_master_part) + + # ---if porting the master populated the map, we're done--- + if src_layout_part in self._layout_map: + return cast(SlideLayoutPart, self._layout_map[src_layout_part]) + + # ---defensive fallback: master had no rel to this layout, port standalone--- + return self._port_layout_standalone(src_layout_part) + + def _port_layout_standalone(self, src_layout_part: SlideLayoutPart) -> SlideLayoutPart: + """Port a layout whose master rel didn't auto-discover it.""" + src_master_part = cast(SlideMasterPart, src_layout_part.part_related_by(RT.SLIDE_MASTER)) + target_master_part = self._port_master(src_master_part) + + new_partname = self.target_package.next_partname("/ppt/slideLayouts/slideLayout%d.xml") + new_element = copy.deepcopy(src_layout_part._element) + _refresh_creation_ids(new_element) + new_layout_part = SlideLayoutPart( + new_partname, src_layout_part.content_type, self.target_package, new_element + ) + self._layout_map[src_layout_part] = new_layout_part + + rId_map = self._replicate_layout_rels(src_layout_part, new_layout_part, target_master_part) + _remap_rId_attrs(new_element, rId_map) + + new_master_layout_rId = target_master_part.relate_to(new_layout_part, RT.SLIDE_LAYOUT) + # ---also append to target master's --- + _add_sldLayoutId_to_master(target_master_part, new_master_layout_rId) + return new_layout_part + + def _port_master(self, src_master_part: SlideMasterPart) -> SlideMasterPart: + """Port a slide-master into target package, including all its layouts. + + Order matters here: each new part must be related into the package's + rel graph before the next call to `next_partname`, otherwise the + unrelated part is invisible to `iter_parts` and `next_partname` + hands out the same partname twice (the zipfile then gets duplicate + entries and PowerPoint rejects the file). + """ + if src_master_part in self._master_map: + return cast(SlideMasterPart, self._master_map[src_master_part]) + + # 1. Create new master part (deep-copy element). + # `` is deterministic in default.pptx, so + # deepcopied creationIds collide with target's; refresh now. + # sldLayoutId renumbering is deferred to step 3 so layout ids + # end up ABOVE the new master's id — PowerPoint's own repair + # output matches this ordering and ECMA-376 sec 19.2.1.34 / + # 19.2.1.27 confirm master and layout ids share one pool. + new_master_partname = self.target_package.next_partname( + "/ppt/slideMasters/slideMaster%d.xml" + ) + new_element = copy.deepcopy(src_master_part._element) + _refresh_creation_ids(new_element) + new_master_part = SlideMasterPart( + new_master_partname, + src_master_part.content_type, + self.target_package, + new_element, + ) + # 2. Register in map + relate to presentation FIRST so iter_parts + # reaches the master before any subsequent next_partname call. + # Allocate the new master's id from the unified master/layout + # pool — collision with an existing layout id flags repair. + self._master_map[src_master_part] = new_master_part + new_master_rId = self.target_pres_part.relate_to(new_master_part, RT.SLIDE_MASTER) + _add_sldMasterId_to_presentation( + self.target_pres_part, new_master_rId, self._used_layout_ids + ) + # 3. Renumber the deepcopied sldLayoutIds NOW, after the master id + # has consumed its slot in the pool, so layouts land above it. + _renumber_sldLayoutIds(new_element, self._used_layout_ids) + + # 3. For each source layout owned by this master: create + register + # + relate to the master IMMEDIATELY (before next next_partname). + src_layouts = [ + cast(SlideLayoutPart, rel.target_part) + for rel in src_master_part.rels.values() + if not rel.is_external and rel.reltype == RT.SLIDE_LAYOUT + ] + new_layouts: list[tuple[SlideLayoutPart, SlideLayoutPart]] = [] + for src_layout in src_layouts: + new_layout_partname = self.target_package.next_partname( + "/ppt/slideLayouts/slideLayout%d.xml" + ) + new_layout_element = copy.deepcopy(src_layout._element) + _refresh_creation_ids(new_layout_element) + new_layout = SlideLayoutPart( + new_layout_partname, + src_layout.content_type, + self.target_package, + new_layout_element, + ) + self._layout_map[src_layout] = new_layout + # Relate IMMEDIATELY so the next next_partname sees this layout. + new_master_part.relate_to(new_layout, RT.SLIDE_LAYOUT) + new_layouts.append((src_layout, new_layout)) + + # 4. Walk master's rels. SLIDE_LAYOUT rels were created in step 3 + # so they are reused (not re-created) by `_replicate_master_rels`. + master_rId_map = self._replicate_master_rels(src_master_part, new_master_part) + _remap_rId_attrs(new_element, master_rId_map) + + # 5. Walk each layout's rels (master is in the map). + for src_layout, new_layout in new_layouts: + layout_rId_map = self._replicate_layout_rels(src_layout, new_layout, new_master_part) + _remap_rId_attrs(new_layout._element, layout_rId_map) + + return new_master_part + + def _replicate_master_rels( + self, src_master_part: SlideMasterPart, new_master_part: SlideMasterPart + ) -> dict[str, str]: + """Mirror master's rels onto new master in target package.""" + rId_map: dict[str, str] = {} + for rId, rel in src_master_part.rels.items(): + if rel.is_external: + new_rId = new_master_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.SLIDE_LAYOUT: + # ---layout was pre-created and is in self._layout_map--- + new_rId = new_master_part.relate_to( + self._layout_map[cast(Part, rel.target_part)], RT.SLIDE_LAYOUT + ) + elif rel.reltype == RT.THEME: + new_target = self._port_theme(cast(XmlPart, rel.target_part)) + new_rId = new_master_part.relate_to(new_target, RT.THEME) + elif rel.reltype == RT.IMAGE: + new_target = self._port_image_part(cast(ImagePart, rel.target_part)) + new_rId = new_master_part.relate_to(new_target, RT.IMAGE) + else: + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_rId = new_master_part.relate_to(new_target, rel.reltype) + rId_map[rId] = new_rId + return rId_map + + def _replicate_layout_rels( + self, + src_layout_part: SlideLayoutPart, + new_layout_part: SlideLayoutPart, + target_master_part: SlideMasterPart, + ) -> dict[str, str]: + """Mirror layout's rels onto new layout in target package.""" + rId_map: dict[str, str] = {} + for rId, rel in src_layout_part.rels.items(): + if rel.is_external: + new_rId = new_layout_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.SLIDE_MASTER: + new_rId = new_layout_part.relate_to(target_master_part, RT.SLIDE_MASTER) + elif rel.reltype == RT.IMAGE: + new_target = self._port_image_part(cast(ImagePart, rel.target_part)) + new_rId = new_layout_part.relate_to(new_target, RT.IMAGE) + else: + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_rId = new_layout_part.relate_to(new_target, rel.reltype) + rId_map[rId] = new_rId + return rId_map + + def _port_theme(self, src_theme_part: Part) -> Part: + """Port a theme part into target package (within-call dedup). + + Theme parts may load as `XmlPart` (created in-memory) or as the + base `Part` (binary blob, when loaded from a `.pptx` on disk and + the content-type isn't registered to a specific XML class). Handle + both cases by branching on element-presence. + """ + if src_theme_part in self._theme_map: + return self._theme_map[src_theme_part] + + new_partname = self.target_package.next_partname("/ppt/theme/theme%d.xml") + cls = type(src_theme_part) + new_part: Part + + if isinstance(src_theme_part, XmlPart): + new_element = copy.deepcopy(src_theme_part._element) + new_part = cls( + new_partname, src_theme_part.content_type, self.target_package, new_element + ) + self._theme_map[src_theme_part] = new_part + rId_map: dict[str, str] = {} + for rId, rel in src_theme_part.rels.items(): + if rel.is_external: + new_rId = new_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.IMAGE: + new_target = self._port_image_part(cast(ImagePart, rel.target_part)) + new_rId = new_part.relate_to(new_target, RT.IMAGE) + else: + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_rId = new_part.relate_to(new_target, rel.reltype) + rId_map[rId] = new_rId + _remap_rId_attrs(new_element, rId_map) + else: + # Theme loaded as binary Part — blob copy. No rels typically. + new_part = cls( + new_partname, src_theme_part.content_type, self.target_package, src_theme_part.blob + ) + self._theme_map[src_theme_part] = new_part + for rId, rel in src_theme_part.rels.items(): + if rel.is_external: + new_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.IMAGE: + new_target = self._port_image_part(cast(ImagePart, rel.target_part)) + new_part.relate_to(new_target, RT.IMAGE) + else: + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_part.relate_to(new_target, rel.reltype) + return new_part + + def _port_image_part(self, src_image_part: ImagePart) -> ImagePart: + """Port an image into target package, deduping by SHA1. + + Routes through `Package.get_or_add_image_part` so an image already + present in target (by SHA1 of bytes) is reused rather than duplicated. + """ + return self.target_package.get_or_add_image_part(BytesIO(src_image_part.blob)) + + def _port_image_part_like(self, src_part: Part) -> Part: + """Port a media-grade binary part (audio/video). Naive blob copy. + + python-pptx's `_MediaParts` dedupes by SHA1 only when constructed + via `add_media_part(video)`, which we don't have here. Doing a + plain blob copy is safe; downstream merging is a follow-up. + """ + return _duplicate_blob_part_into(src_part, self.target_package) + + def _port_unknown_part(self, src_part: Part) -> Part: + """Last-resort port for unknown reltypes — deep-copy if XML, else blob. + + Practical safety net for OOXML extensions we don't enumerate + explicitly (font tables, vmlDrawings, etc.). XML parts get a + deepcopy element + rId remap; binary parts get a blob copy. + """ + if isinstance(src_part, XmlPart): + new_partname = self.target_package.next_partname( + _derive_partname_template(str(src_part.partname)) + ) + new_element = copy.deepcopy(src_part._element) + cls = type(src_part) + new_part = cls(new_partname, src_part.content_type, self.target_package, new_element) + rId_map: dict[str, str] = {} + for rId, rel in src_part.rels.items(): + if rel.is_external: + new_rId = new_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + else: + new_target = self._port_unknown_part(cast(Part, rel.target_part)) + new_rId = new_part.relate_to(new_target, rel.reltype) + rId_map[rId] = new_rId + _remap_rId_attrs(new_element, rId_map) + return new_part + return _duplicate_blob_part_into(src_part, self.target_package) + + +def _duplicate_chart_part_into(src: ChartPart, target_package) -> ChartPart: + """Cross-package variant of `_duplicate_chart_part`.""" + new_partname = target_package.next_partname("/ppt/charts/chart%d.xml") + new_element = copy.deepcopy(src._element) + cls = type(src) + new_part = cls(new_partname, src.content_type, target_package, new_element) + rId_map: dict[str, str] = {} + for rId, rel in src.rels.items(): + if rel.is_external: + new_rId = new_part.relate_to(rel.target_ref, rel.reltype, is_external=True) + elif rel.reltype == RT.PACKAGE: + new_target = _duplicate_blob_part_into(cast(Part, rel.target_part), target_package) + new_rId = new_part.relate_to(new_target, rel.reltype) + else: + new_rId = new_part.relate_to(rel.target_part, rel.reltype) + rId_map[rId] = new_rId + _remap_rId_attrs(new_element, rId_map) + return new_part + + +def _duplicate_blob_part_into(src: Part, target_package) -> Part: + """Cross-package variant of `_duplicate_blob_part`.""" + cls = type(src) + tmpl = getattr(cls, "partname_template", None) + if tmpl is None: + tmpl = _derive_partname_template(str(src.partname)) + new_partname = target_package.next_partname(tmpl) + return cls(new_partname, src.content_type, target_package, src.blob) + + +def _used_master_layout_ids_in_target(pres_part: PresentationPart) -> set[int]: + """Return every `` AND `` `id` in target. + + PowerPoint enforces uniqueness across the SHARED master/layout id pool + (per ECMA-376 sec 19.2.1.34 sldMasterId / sec 19.2.1.27 sldLayoutId, + both consume the same conceptual identifier space). A new master id + that collides with an existing layout id flags the file for repair on + load. + """ + used: set[int] = set() + sldMasterIdLst = pres_part._element.find(f"{_P_NS}sldMasterIdLst") + if sldMasterIdLst is not None: + for smi in sldMasterIdLst.findall(f"{_P_NS}sldMasterId"): + raw = smi.get("id") + if raw is None: + continue + with contextlib.suppress(ValueError): + used.add(int(raw)) + for rel in pres_part.rels.values(): + if rel.is_external or rel.reltype != RT.SLIDE_MASTER: + continue + master_element = rel.target_part._element + sldLayoutIdLst = master_element.find(f"{_P_NS}sldLayoutIdLst") + if sldLayoutIdLst is None: + continue + for sli in sldLayoutIdLst.findall(f"{_P_NS}sldLayoutId"): + raw = sli.get("id") + if raw is None: + continue + with contextlib.suppress(ValueError): + used.add(int(raw)) + return used + + +def _renumber_sldLayoutIds(element, used_ids: set[int]) -> None: + """Reassign `` `id` attributes on a deepcopied master. + + Mutates `used_ids` in place so back-to-back ports of multiple masters + keep allocating unique ids relative to one another, not just relative + to target's pre-existing masters. + """ + sldLayoutIdLst = element.find(f"{_P_NS}sldLayoutIdLst") + if sldLayoutIdLst is None: + return + next_id = max(used_ids | {_OOXML_LAYOUT_ID_FLOOR - 1}) + 1 + for sli in sldLayoutIdLst.findall(f"{_P_NS}sldLayoutId"): + sli.set("id", str(next_id)) + used_ids.add(next_id) + next_id += 1 + + +def _refresh_creation_ids(element) -> None: + """Reassign every `` to a fresh uint32 value. + + The default `default.pptx` ships with deterministic vals on each + master / layout ``, so two presentations built from + it carry identical vals. PowerPoint flags those as bad content. + Replacing the val (rather than stripping the element) keeps the + extLst structure PowerPoint expects intact. + """ + creation_id_tag = f"{_P14_NS}creationId" + for node in element.iter(creation_id_tag): + # 32-bit unsigned, cryptographically strong enough for collision avoidance. + node.set("val", str(secrets.randbits(32))) + + +def _add_sldMasterId_to_presentation( + pres_part: PresentationPart, rId: str, used_ids: set[int] | None = None +) -> None: + """Append a `` to the presentation, allocating an id. + + Slide-master ids in OOXML are required uint32 values typically in the + high range (default first master is `2147483648`). python-pptx's + `CT_SlideMasterIdListEntry` only declares the `rId` attribute, so we + set / read `id` directly on the lxml element to round-trip cleanly. + + `used_ids`, when supplied, is the unified master/layout id pool. The + new id is taken above its current max and added to it; PowerPoint + enforces master/layout id uniqueness across this shared pool, and a + collision flags the file for repair on load. When `used_ids` is None, + only existing sldMasterId values are scanned (back-compat for any + out-of-band caller). + """ + sldMasterIdLst = pres_part._element.get_or_add_sldMasterIdLst() + if used_ids is None: + used_ids = set() + for sm in sldMasterIdLst.sldMasterId_lst: + raw = sm.get("id") + if raw is not None: + with contextlib.suppress(ValueError): + used_ids.add(int(raw)) + next_id = max(used_ids | {_OOXML_LAYOUT_ID_FLOOR - 1}) + 1 + sldMasterId = sldMasterIdLst._add_sldMasterId() + sldMasterId.rId = rId + sldMasterId.set("id", str(next_id)) + used_ids.add(next_id) + + +def _add_sldLayoutId_to_master(master_part, rId: str) -> None: + """Append a `` to the master, allocating an id.""" + sldLayoutIdLst = master_part._element.get_or_add_sldLayoutIdLst() + used_ids: list[int] = [] + for sl in sldLayoutIdLst.sldLayoutId_lst: + raw = sl.get("id") + if raw is not None: + with contextlib.suppress(ValueError): + used_ids.append(int(raw)) + next_id = max(used_ids + [2147483647]) + 1 + sldLayoutId = sldLayoutIdLst._add_sldLayoutId() + sldLayoutId.rId = rId + sldLayoutId.set("id", str(next_id)) + + +# SlideLayoutPart / SlideMasterPart are defined further down in this same +# module. Method bodies above resolve those names lazily via the module's +# globals at call time, so no forward declaration is needed. + + def _duplicate_chart_part(src: ChartPart) -> ChartPart: """Return a new ChartPart cloning `src`. diff --git a/src/pptx/presentation.py b/src/pptx/presentation.py index 94fa3fcb5..0ffae2816 100644 --- a/src/pptx/presentation.py +++ b/src/pptx/presentation.py @@ -2,8 +2,9 @@ from __future__ import annotations -from typing import IO, TYPE_CHECKING, cast +from typing import IO, TYPE_CHECKING, Iterable, cast +from pptx.opc.constants import RELATIONSHIP_TYPE as RT from pptx.shared import PartElementProxy from pptx.slide import SlideMasters, Slides from pptx.util import lazyproperty @@ -13,7 +14,7 @@ from pptx.custom_xml import CustomXmlParts from pptx.oxml.presentation import CT_Presentation, CT_SlideId from pptx.parts.presentation import PresentationPart - from pptx.slide import NotesMaster, SlideLayouts + from pptx.slide import NotesMaster, Slide, SlideLayouts from pptx.util import Length @@ -134,3 +135,73 @@ def slides(self): sldIdLst = self._element.get_or_add_sldIdLst() self.part.rename_slide_parts([cast("CT_SlideId", sldId).rId for sldId in sldIdLst]) return Slides(sldIdLst, self) + + def append_from( + self, + other_pres: Presentation, + slide_indexes: Iterable[int] | None = None, + ) -> list[Slide]: + """Append slides from `other_pres` onto the end of this presentation. + + When `slide_indexes` is |None| (the default), every slide of + `other_pres` is appended in source order. When `slide_indexes` is + an iterable of zero-based ints, only those slides are appended + (in the iteration order, allowing reordering via index sequence). + + Each appended slide brings along — into this presentation's + package — its own deep-copied ```` element, its + slide-layout, the slide-master that layout belongs to (with + ALL of that master's layouts, to keep the master's layout tree + intact), the master's theme, and any per-slide private parts + (chart, OLE-object, notes-slide). Image and embedded-media + parts dedupe at the target package by SHA1 of bytes — an image + already present in this presentation's package is reused, not + copied. + + Within a single ``append_from`` call, source slide-masters + (and their layouts and themes) are ported exactly once even + when shared by multiple appended slides. Two consecutive + ``append_from`` calls do NOT share that cache, so calling + twice in succession ports the master twice — pass all wanted + slides in a single call to keep the part graph compact. + + Comments parts on the source slides are dropped (consistent + with Phase 2 of issue #11). Notes-slides on the source bring + their own deep-copied notes-slide part; the notes-master is + this presentation's existing notes-master, NOT a port of + the source's. + + Returns the list of newly-added |Slide| objects in insertion + order — same length as the number of slides actually appended. + + Raises |IndexError| if any value in `slide_indexes` is out of + range for ``other_pres.slides``. + """ + from pptx.parts.slide import _PortContext, duplicate_notes_slide_for # noqa: F401 + + if slide_indexes is None: + source_slides = list(other_pres.slides) + else: + indexes = list(slide_indexes) + n_source = len(other_pres.slides) + for idx in indexes: + if idx < 0 or idx >= n_source: + raise IndexError("slide index out of range") + source_slides = [other_pres.slides[i] for i in indexes] + + if not source_slides: + return [] + + ctx = _PortContext(self.part) + sldIdLst = self._element.get_or_add_sldIdLst() + new_slides: list[Slide] = [] + + for src_slide in source_slides: + new_slide_part = ctx.port_slide(src_slide.part) + new_rId = self.part.relate_to(new_slide_part, RT.SLIDE) + sldIdLst.add_sldId(new_rId) + if src_slide.part.has_notes_slide: + ctx.port_notes_slide(src_slide.part, new_slide_part) + new_slides.append(new_slide_part.slide) + + return new_slides diff --git a/tests/test_append_from.py b/tests/test_append_from.py new file mode 100644 index 000000000..32ac46a8f --- /dev/null +++ b/tests/test_append_from.py @@ -0,0 +1,648 @@ +# pyright: reportPrivateUsage=false + +"""Unit-test suite for `Presentation.append_from` (slide-CRUD Phase 3). + +Issue: https://github.com/MHoroszowski/python-pptx/issues/11 (Phase 3 — cross-deck copy). + +Implements the slide-CRUD epic's append_from sub-feature: copy slides between +TWO different python-pptx Presentation instances, with cross-package porting +of slide-layout / slide-master / theme and image-dedup at the target package. +""" + +from __future__ import annotations + +import io +from pathlib import Path + +import pytest + +from pptx import Presentation +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.parts.image import ImagePart +from pptx.parts.slide import SlideMasterPart, SlidePart +from pptx.util import Inches + +_PNG_PATH = Path(__file__).parent / "test_files" / "python-powered.png" + + +# --------------------------------------------------------------------------- +# Helpers — keep round-trip plumbing identical to test_slide_crud / +# test_slide_duplicate. +# --------------------------------------------------------------------------- + + +def _seed_presentation_with(n_slides: int) -> Presentation: + prs = Presentation() + layout = prs.slide_layouts[6] + for i in range(n_slides): + slide = prs.slides.add_slide(layout) + slide.shapes.add_textbox(Inches(1), Inches(1), Inches(2), Inches(1)).text_frame.text = ( + "slide %d" % i + ) + return prs + + +def _round_trip(prs: Presentation) -> Presentation: + buf = io.BytesIO() + prs.save(buf) + buf.seek(0) + return Presentation(buf) + + +def _image_part_count(prs: Presentation) -> int: + return sum(1 for p in prs.part.package.iter_parts() if isinstance(p, ImagePart)) + + +def _slide_part_count(prs: Presentation) -> int: + return sum(1 for p in prs.part.package.iter_parts() if isinstance(p, SlidePart)) + + +def _master_part_count(prs: Presentation) -> int: + return sum(1 for p in prs.part.package.iter_parts() if isinstance(p, SlideMasterPart)) + + +# --------------------------------------------------------------------------- +# API surface. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_API(object): + """Argument validation, signature, and return shape.""" + + def it_exposes_append_from_on_Presentation(self): + prs = Presentation() + assert callable(getattr(prs, "append_from", None)) + + def it_returns_a_list_of_Slide_objects(self): + from pptx.slide import Slide + + src = _seed_presentation_with(2) + tgt = Presentation() + + result = tgt.append_from(src) + + assert isinstance(result, list) + assert all(isinstance(s, Slide) for s in result) + assert len(result) == 2 + + def it_appends_all_source_slides_when_slide_indexes_is_None(self): + src = _seed_presentation_with(3) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + + tgt.append_from(src) + + assert len(tgt.slides) == 4 + + def it_appends_only_selected_indexes_when_slide_indexes_provided(self): + src = _seed_presentation_with(3) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + + tgt.append_from(src, slide_indexes=[0, 2]) + + assert len(tgt.slides) == 3 + + def it_preserves_source_index_order_in_appended_slides(self): + src = _seed_presentation_with(3) + tgt = Presentation() + + new_slides = tgt.append_from(src, slide_indexes=[2, 0]) + + # Verify by text content of the textbox we seeded. + def _first_text(slide): + return next( + shp.text_frame.text for shp in slide.shapes if getattr(shp, "has_text_frame", False) + ) + + assert _first_text(new_slides[0]) == "slide 2" + assert _first_text(new_slides[1]) == "slide 0" + + @pytest.mark.parametrize("bad_idx", [-1, 99]) + def it_raises_IndexError_for_out_of_range_index(self, bad_idx: int): + src = _seed_presentation_with(2) + tgt = Presentation() + with pytest.raises(IndexError): + tgt.append_from(src, slide_indexes=[0, bad_idx]) + + def it_treats_empty_slide_indexes_as_a_noop(self): + src = _seed_presentation_with(3) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + slide_count_before = len(tgt.slides) + + result = tgt.append_from(src, slide_indexes=[]) + + assert result == [] + assert len(tgt.slides) == slide_count_before + + def it_supports_self_append_from_self(self): + """Edge case — appending from self to self acts like multi-duplicate.""" + prs = _seed_presentation_with(2) + slide_count_before = len(prs.slides) + + new_slides = prs.append_from(prs, slide_indexes=[0]) + + assert len(prs.slides) == slide_count_before + 1 + assert len(new_slides) == 1 + + +# --------------------------------------------------------------------------- +# Cross-package porting: per-slide. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_PerSlide(object): + """Each appended slide gets its own deep-copied part.""" + + def it_grows_target_slide_part_count_by_the_number_of_appended_slides(self): + src = _seed_presentation_with(3) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + n_before = _slide_part_count(tgt) + + tgt.append_from(src) + + assert _slide_part_count(tgt) == n_before + 3 + + def it_does_not_mutate_source_slide_count(self): + """Anti — source presentation must be unchanged after append.""" + src = _seed_presentation_with(2) + tgt = Presentation() + src_slide_count_before = len(src.slides) + + tgt.append_from(src) + + assert len(src.slides) == src_slide_count_before + + def it_assigns_unique_partnames_across_appended_slides(self): + src = _seed_presentation_with(3) + tgt = Presentation() + + tgt.append_from(src) + + partnames = [s.part.partname for s in tgt.slides] + assert len(set(partnames)) == len(partnames) + + def it_assigns_unique_slide_ids_across_appended_slides(self): + src = _seed_presentation_with(2) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + + tgt.append_from(src) + + ids = [s.slide_id for s in tgt.slides] + assert len(set(ids)) == len(ids) + + def it_isolates_modifications_to_appended_slides_from_source(self): + src = _seed_presentation_with(1) + tgt = Presentation() + + new_slide = tgt.append_from(src)[0] + textbox = next(shp for shp in new_slide.shapes if getattr(shp, "has_text_frame", False)) + textbox.text_frame.text = "MUTATED" + + src_text = next( + shp.text_frame.text + for shp in src.slides[0].shapes + if getattr(shp, "has_text_frame", False) + ) + assert src_text == "slide 0" + + +# --------------------------------------------------------------------------- +# Slide-master / slide-layout / theme porting. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_MasterPort(object): + """Source's master + layouts get ported into the target package.""" + + def it_adds_a_new_master_to_target_when_appending_from_a_separate_pres(self): + src = _seed_presentation_with(1) + tgt = Presentation() + n_masters_before = _master_part_count(tgt) + + tgt.append_from(src) + + assert _master_part_count(tgt) == n_masters_before + 1 + + def it_dedups_master_within_a_single_call_for_slides_sharing_a_master(self): + """Two source slides on same master → one ported master in target.""" + src = _seed_presentation_with(3) # all three share src's single master + tgt = Presentation() + + tgt.append_from(src) + + # target had 1 master, source has 1 master → after port, target has 2 + assert _master_part_count(tgt) == 2 + + def it_does_not_dedup_master_across_separate_calls(self): + """Two consecutive append_from calls re-port the master.""" + src = _seed_presentation_with(1) + tgt = Presentation() + + tgt.append_from(src) + tgt.append_from(src) + + # target started at 1, +1 per call = 3 + assert _master_part_count(tgt) == 3 + + def it_appends_a_new_sldMasterId_entry_to_target_presentation(self): + src = _seed_presentation_with(1) + tgt = Presentation() + n_master_id_entries_before = len( + tgt.part._element.get_or_add_sldMasterIdLst().sldMasterId_lst + ) + + tgt.append_from(src) + + n_master_id_entries_after = len( + tgt.part._element.get_or_add_sldMasterIdLst().sldMasterId_lst + ) + assert n_master_id_entries_after == n_master_id_entries_before + 1 + + def it_assigns_a_unique_master_id_to_the_new_sldMasterId_entry(self): + src = _seed_presentation_with(1) + tgt = Presentation() + + tgt.append_from(src) + + ids = [sm.get("id") for sm in tgt.part._element.get_or_add_sldMasterIdLst().sldMasterId_lst] + assert all(i is not None for i in ids) + assert len(set(ids)) == len(ids) + + def it_ports_all_layouts_of_the_source_master(self): + """Master's layout tree is ported intact, not just the referenced one.""" + src = _seed_presentation_with(1) + n_src_layouts = len(src.slide_master.slide_layouts) + tgt = Presentation() + n_tgt_layouts_before = sum(len(m.slide_layouts) for m in tgt.slide_masters) + + tgt.append_from(src) + + n_tgt_layouts_after = sum(len(m.slide_layouts) for m in tgt.slide_masters) + assert n_tgt_layouts_after == n_tgt_layouts_before + n_src_layouts + + +# --------------------------------------------------------------------------- +# Image / media dedup at target package. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_ImageDedup(object): + """Cross-package SHA1 dedup: images shared by source AND target unify.""" + + def it_dedups_a_source_image_already_present_in_target(self): + """Same PNG used in both decks → one ImagePart in target after append.""" + src = Presentation() + s1 = src.slides.add_slide(src.slide_layouts[6]) + s1.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + + tgt = Presentation() + t1 = tgt.slides.add_slide(tgt.slide_layouts[6]) + t1.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + n_images_before = _image_part_count(tgt) + + tgt.append_from(src) + + # Image IS the same blob → SHA1 dedup → no new ImagePart in target. + assert _image_part_count(tgt) == n_images_before + + def it_dedups_within_a_single_call_when_two_source_slides_share_an_image(self): + src = Presentation() + s1 = src.slides.add_slide(src.slide_layouts[6]) + s1.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + s2 = src.slides.add_slide(src.slide_layouts[6]) + s2.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + + tgt = Presentation() + n_images_before = _image_part_count(tgt) + + tgt.append_from(src) + + # Both source slides referenced the same blob; target gets ONE new ImagePart. + assert _image_part_count(tgt) == n_images_before + 1 + + def it_round_trips_dedup_through_save_and_reopen(self): + src = Presentation() + s1 = src.slides.add_slide(src.slide_layouts[6]) + s1.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + + tgt = Presentation() + t1 = tgt.slides.add_slide(tgt.slide_layouts[6]) + t1.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + + tgt.append_from(src) + reopened = _round_trip(tgt) + + # Both slides in reopened present the same PNG bytes via shared ImagePart. + slides_with_pictures = [ + [shp for shp in s.shapes if shp.shape_type == 13] for s in reopened.slides + ] + assert all(len(pics) == 1 for pics in slides_with_pictures) + blobs = [pics[0].image.blob for pics in slides_with_pictures] + assert blobs[0] == blobs[1] + + +# --------------------------------------------------------------------------- +# Notes-slide handling. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_NotesSlide(object): + """Notes-slide gets its own copy; back-rel rewired to new slide; notes-master shared.""" + + def it_gives_appended_slide_its_own_notes_slide_part(self): + src = _seed_presentation_with(1) + src.slides[0].notes_slide.notes_text_frame.text = "speaker notes" + tgt = Presentation() + + new_slide = tgt.append_from(src)[0] + + assert new_slide.has_notes_slide is True + assert new_slide.notes_slide.part is not src.slides[0].notes_slide.part + + def it_carries_notes_text_to_the_appended_slide(self): + src = _seed_presentation_with(1) + src.slides[0].notes_slide.notes_text_frame.text = "speaker notes" + tgt = Presentation() + + new_slide = tgt.append_from(src)[0] + + assert new_slide.notes_slide.notes_text_frame.text == "speaker notes" + + def it_rewires_the_notes_back_rel_to_the_new_slide(self): + """Cross-package gotcha #961: notes-slide's RT.SLIDE points at NEW slide.""" + src = _seed_presentation_with(1) + src.slides[0].notes_slide.notes_text_frame.text = "x" + tgt = Presentation() + + new_slide = tgt.append_from(src)[0] + + new_notes_part = new_slide.notes_slide.part + back_ref_target = new_notes_part.part_related_by(RT.SLIDE) + assert back_ref_target is new_slide.part + + def it_uses_targets_existing_notes_master_not_a_port_of_source(self): + """Notes-master is presentation-singleton — target's existing one is reused.""" + src = _seed_presentation_with(1) + src.slides[0].notes_slide.notes_text_frame.text = "x" + tgt = Presentation() + # ---force-create target notes-master before append--- + target_notes_master_part = tgt.part.notes_master_part + + new_slide = tgt.append_from(src)[0] + + new_notes_master = new_slide.notes_slide.part.part_related_by(RT.NOTES_MASTER) + assert new_notes_master is target_notes_master_part + + def it_does_not_create_a_notes_slide_when_source_has_none(self): + src = _seed_presentation_with(1) + # no notes-slide on source + assert src.slides[0].part.has_notes_slide is False + tgt = Presentation() + + new_slide = tgt.append_from(src)[0] + + assert new_slide.has_notes_slide is False + + +# --------------------------------------------------------------------------- +# Round-trip integration. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_RoundTrip(object): + """Open → append_from → save → reopen integration coverage.""" + + def it_round_trips_a_basic_append_all(self): + src = _seed_presentation_with(2) + tgt = Presentation() + tgt.slides.add_slide(tgt.slide_layouts[6]) + + tgt.append_from(src) + reopened = _round_trip(tgt) + + assert len(reopened.slides) == 3 + + def it_round_trips_selective_append(self): + src = _seed_presentation_with(3) + tgt = Presentation() + + tgt.append_from(src, slide_indexes=[2, 0]) + reopened = _round_trip(tgt) + + # Verify by text content + texts = [] + for s in reopened.slides: + for shp in s.shapes: + if getattr(shp, "has_text_frame", False): + texts.append(shp.text_frame.text) + break + else: + texts.append(None) + assert texts == ["slide 2", "slide 0"] + + def it_round_trips_appended_slide_with_an_image(self): + src = Presentation() + s = src.slides.add_slide(src.slide_layouts[6]) + s.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + + tgt = Presentation() + tgt.append_from(src) + reopened = _round_trip(tgt) + + pics = [shp for shp in reopened.slides[0].shapes if shp.shape_type == 13] + assert len(pics) == 1 + assert len(pics[0].image.blob) > 0 + + def it_round_trips_appended_slide_with_notes(self): + src = _seed_presentation_with(1) + src.slides[0].notes_slide.notes_text_frame.text = "round-trip notes" + + tgt = Presentation() + tgt.append_from(src) + reopened = _round_trip(tgt) + + assert reopened.slides[0].has_notes_slide is True + assert reopened.slides[0].notes_slide.notes_text_frame.text == "round-trip notes" + + def it_preserves_master_count_through_round_trip(self): + src = _seed_presentation_with(1) + tgt = Presentation() + + tgt.append_from(src) + masters_before = _master_part_count(tgt) + reopened = _round_trip(tgt) + + assert _master_part_count(reopened) == masters_before + + +# --------------------------------------------------------------------------- +# Anti-criteria. +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_Antis(object): + """Anti-criteria: properties that MUST NOT change after append_from.""" + + def it_does_not_mutate_source_image_part_count(self): + src = Presentation() + s = src.slides.add_slide(src.slide_layouts[6]) + s.shapes.add_picture(str(_PNG_PATH), Inches(1), Inches(1)) + n_src_images_before = _image_part_count(src) + + tgt = Presentation() + tgt.append_from(src) + + assert _image_part_count(src) == n_src_images_before + + def it_does_not_mutate_source_master_count(self): + src = _seed_presentation_with(1) + n_src_masters_before = _master_part_count(src) + + tgt = Presentation() + tgt.append_from(src) + + assert _master_part_count(src) == n_src_masters_before + + def it_does_not_drop_existing_target_slides(self): + src = _seed_presentation_with(1) + tgt = _seed_presentation_with(2) + existing_ids = [s.slide_id for s in tgt.slides] + + tgt.append_from(src) + + ids_after = [s.slide_id for s in tgt.slides] + for prior_id in existing_ids: + assert prior_id in ids_after + + def it_does_not_carry_comments_through_an_append(self): + """Phase 3 scope: comments rels are dropped, same as Phase 2.""" + src = _seed_presentation_with(1) + tgt = Presentation() + + new_slide = tgt.append_from(src)[0] + + with pytest.raises(KeyError): + new_slide.part.part_related_by(RT.COMMENTS) + + +# --------------------------------------------------------------------------- +# PowerPoint round-trip integrity (uniqueness of OOXML-spec ID attrs). +# --------------------------------------------------------------------------- + + +class DescribePresentation_AppendFrom_OOXMLIntegrity(object): + """PowerPoint flags 'bad content / repair' on collisions across masters. + + Two distinct collision classes appear when porting a master from a + source presentation that was built from the same default template as + the target: + + 1. ```` — the deepcopy carries source's layout + ids verbatim, which match target's already-existing master's ids. + 2. ```` — the default ``default.pptx`` ships + with a deterministic creationId on every master and layout, so + both masters and every parallel layout end up with identical vals. + + Both surface only after a save -> reopen, hence the zip-level checks. + """ + + def _save_to_zip(self, prs): + import zipfile + + buf = io.BytesIO() + prs.save(buf) + buf.seek(0) + return zipfile.ZipFile(buf) + + def it_assigns_unique_sldLayoutId_values_across_masters(self): + import re + + src = _seed_presentation_with(1) + tgt = Presentation() + + tgt.append_from(src) + + with self._save_to_zip(tgt) as zf: + all_ids: list[str] = [] + for n in zf.namelist(): + if not (n.startswith("ppt/slideMasters/slideMaster") and n.endswith(".xml")): + continue + xml = zf.read(n).decode() + all_ids.extend(re.findall(r']*val="([^"]+)"', xml)) + + assert len(master_creation_ids) == len(set(master_creation_ids)), ( + "duplicate master creationId: %r" % master_creation_ids + ) + + def it_does_not_duplicate_p14_creationId_across_layouts(self): + import re + + src = _seed_presentation_with(1) + tgt = Presentation() + + tgt.append_from(src) + + with self._save_to_zip(tgt) as zf: + layout_creation_ids: list[str] = [] + for n in zf.namelist(): + if not (n.startswith("ppt/slideLayouts/slideLayout") and n.endswith(".xml")): + continue + xml = zf.read(n).decode() + layout_creation_ids.extend(re.findall(r'creationId[^>]*val="([^"]+)"', xml)) + + assert len(layout_creation_ids) == len(set(layout_creation_ids)), ( + "duplicate layout creationId: %r" % layout_creation_ids + )