Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions features/sld-duplicate.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Feature: Slide duplicate — Slides.duplicate, Slide.duplicate
In order to programmatically clone slides without lxml hacks
As a developer using python-pptx
I need a single API call that duplicates a slide and inserts it at a chosen position


Scenario: Slides.duplicate(slide) inserts the copy after the source
Given a Slides object containing 3 slides
When I call slides.duplicate(slides[0])
Then len(slides) is 4
And the duplicate is at index 1
And the source slide is still at index 0


Scenario: Slides.duplicate(slide, index=N) inserts the copy at index N
Given a Slides object containing 3 slides
When I call slides.duplicate(slides[0], index=3)
Then len(slides) is 4
And the duplicate is at index 3


Scenario: Slide.duplicate() returns a Slide with a new unique slide_id
Given a Slides object containing 3 slides
When I call slides[1].duplicate()
Then len(slides) is 4
And the duplicate slide_id is unique


Scenario: Slides.duplicate raises IndexError for an out-of-range index
Given a Slides object containing 3 slides
Then calling slides.duplicate(slides[0], index=99) raises IndexError
44 changes: 44 additions & 0 deletions features/steps/slides.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,21 @@ def when_I_call_slide_delete(context):
context.slides[1].delete()


@when("I call slides.duplicate(slides[0])")
def when_I_call_slides_duplicate_default_index(context):
context.new_slide = context.slides.duplicate(context.slides[0])


@when("I call slides.duplicate(slides[0], index=3)")
def when_I_call_slides_duplicate_index_3(context):
context.new_slide = context.slides.duplicate(context.slides[0], index=3)


@when("I call slides[1].duplicate()")
def when_I_call_slide_duplicate_alias(context):
context.new_slide = context.slides[1].duplicate()


# then ====================================================


Expand Down Expand Up @@ -192,3 +207,32 @@ def then_surviving_slide_order_matches_0_2(context):
expected = [o[0], o[2]]
actual = [s.slide_id for s in context.slides]
assert actual == expected, "expected %r, got %r" % (expected, actual)


@then("the duplicate is at index {idx:d}")
def then_the_duplicate_is_at_index(context, idx):
assert context.slides[idx].slide_id == context.new_slide.slide_id, (
"expected duplicate at index %d, got slide_id mismatch" % idx
)


@then("the source slide is still at index 0")
def then_source_slide_still_at_index_0(context):
assert context.slides[0].slide_id == context.original_slide_ids[0], (
"source slide moved off index 0"
)


@then("the duplicate slide_id is unique")
def then_duplicate_slide_id_is_unique(context):
assert context.new_slide.slide_id not in context.original_slide_ids, (
"duplicate slide_id collides with an existing slide"
)


@then("calling slides.duplicate(slides[0], index=99) raises IndexError")
def then_duplicate_index_99_raises(context):
import pytest

with pytest.raises(IndexError):
context.slides.duplicate(context.slides[0], index=99)
171 changes: 170 additions & 1 deletion src/pptx/parts/slide.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

from __future__ import annotations

import copy
import re
from typing import IO, TYPE_CHECKING, cast

from pptx.enum.shapes import PROG_ID
from pptx.opc.constants import CONTENT_TYPE as CT
from pptx.opc.constants import RELATIONSHIP_TYPE as RT
from pptx.opc.package import XmlPart
from pptx.opc.package import Part, XmlPart
from pptx.opc.packuri import PackURI
from pptx.oxml.slide import CT_NotesMaster, CT_NotesSlide, CT_Slide
from pptx.oxml.theme import CT_OfficeStyleSheet
Expand Down Expand Up @@ -259,6 +261,173 @@ def _add_notes_slide_part(self):
self.relate_to(notes_slide_part, RT.NOTES_SLIDE)
return notes_slide_part

def duplicate(self) -> SlidePart:
"""Return a new |SlidePart| that is a deep copy of this one.

Image, media, slide-layout, and slide-master rels are reused —
the duplicate references the same package-level parts as the
source. Chart, OLE-embedded, and embedded-package parts are
deep-copied per duplicate. The notes-slide rel and any
comments rels are NOT carried over: notes-slide rewiring is
the caller's job (see |Slides.duplicate|), and comments are
out of scope for Phase 2 of issue #11.
"""
new_partname = self._package.next_partname("/ppt/slides/slide%d.xml")
new_element = copy.deepcopy(self._element)
new_part = SlidePart(new_partname, CT.PML_SLIDE, self._package, new_element)

rId_map = _replicate_rels_for_duplicate(self, new_part)
_remap_rId_attrs(new_element, rId_map)

return new_part


# ---------------------------------------------------------------------------
# Module-level helpers for slide / slide-private part duplication.
# ---------------------------------------------------------------------------

_RELS_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"

# Reltypes filtered out during slide duplication. NOTES_SLIDE is wired
# explicitly by |Slides.duplicate| so the new notes-slide back-references
# the new parent slide. Comments are dropped — Phase 2 scope (issue #11).
_DUP_DROP_RELTYPES_SLIDE = frozenset({RT.NOTES_SLIDE, RT.COMMENTS, RT.COMMENT_AUTHORS})


def _replicate_rels_for_duplicate(src_part: Part, new_part: Part) -> dict[str, str]:
"""Mirror src_part's slide-relevant rels onto new_part.

Returns a `{old_rId: new_rId}` map for rId-attribute remapping.
"""
rId_map: dict[str, str] = {}
for rId, rel in src_part.rels.items():
if rel.reltype in _DUP_DROP_RELTYPES_SLIDE:
continue
if rel.is_external:
new_rId = new_part.relate_to(rel.target_ref, rel.reltype, is_external=True)
elif rel.reltype == RT.CHART:
new_target = _duplicate_chart_part(cast(ChartPart, rel.target_part))
new_rId = new_part.relate_to(new_target, rel.reltype)
elif rel.reltype in (RT.OLE_OBJECT, RT.PACKAGE):
new_target = _duplicate_blob_part(cast(Part, rel.target_part))
new_rId = new_part.relate_to(new_target, rel.reltype)
else:
# Shared parts: image, media, video, layout, master, theme, etc.
new_rId = new_part.relate_to(rel.target_part, rel.reltype)
rId_map[rId] = new_rId
return rId_map


def _remap_rId_attrs(element, rId_map: dict[str, str]) -> None:
"""Substitute relationships-namespace attribute values in `element`.

Walks every descendant element and rewrites any attribute whose name
is in the OOXML relationships namespace (catches `r:id`, `r:embed`,
`r:link`, `r:pict`, `r:href` in one pass).
"""
for el in element.iter():
for attr_name in list(el.attrib):
if attr_name.startswith(_RELS_NS):
old = el.attrib[attr_name]
if old in rId_map:
el.attrib[attr_name] = rId_map[old]


def _duplicate_chart_part(src: ChartPart) -> ChartPart:
"""Return a new ChartPart cloning `src`.

Chart XML is deep-copied. Embedded data (e.g. an xlsx workbook
reached via an `RT.PACKAGE` rel) is binary and must be blob-copied,
not deep-copy-of-XML — the workbook IS the chart's data, and the
`<c:numCache>` values in the chart XML mirror it.
"""
package = src._package
new_partname = package.next_partname("/ppt/charts/chart%d.xml")
new_element = copy.deepcopy(src._element)
cls = type(src)
new_part = cls(new_partname, src.content_type, package, new_element)
rId_map: dict[str, str] = {}
for rId, rel in src.rels.items():
if rel.is_external:
new_rId = new_part.relate_to(rel.target_ref, rel.reltype, is_external=True)
elif rel.reltype == RT.PACKAGE:
new_target = _duplicate_blob_part(cast(Part, rel.target_part))
new_rId = new_part.relate_to(new_target, rel.reltype)
else:
# Theme override and other chart-private parts: share for now.
# Practical impact is small; revisit if a user reports it.
new_rId = new_part.relate_to(rel.target_part, rel.reltype)
rId_map[rId] = new_rId
_remap_rId_attrs(new_element, rId_map)
return new_part


def _duplicate_blob_part(src: Part) -> Part:
"""Return a new binary |Part| cloning `src`'s blob.

Used for embedded packages (xlsx, docx, pptx) and OLE objects —
parts whose payload is opaque bytes rather than XML.
"""
package = src._package
cls = type(src)
tmpl = getattr(cls, "partname_template", None)
if tmpl is None:
tmpl = _derive_partname_template(str(src.partname))
new_partname = package.next_partname(tmpl)
return cls(new_partname, src.content_type, package, src.blob)


def _derive_partname_template(partname: str) -> str:
"""Derive a `next_partname`-compatible template from an existing partname.

Replaces the trailing integer (just before the final extension) with
`%d`. Falls back to inserting `%d` immediately before the extension
if there is no trailing digit run.
"""
match = re.match(r"^(.*?)(\d+)(\.[^./]+)$", partname)
if match:
prefix, _, ext = match.groups()
return f"{prefix}%d{ext}"
# No trailing-digit pattern; insert %d before final extension.
dot = partname.rfind(".")
if dot < 0:
return f"{partname}%d"
return f"{partname[:dot]}%d{partname[dot:]}"


def duplicate_notes_slide_for(
src_slide_part: SlidePart, new_slide_part: SlidePart
) -> NotesSlidePart:
"""Create a fresh |NotesSlidePart| for `new_slide_part`, cloning content from src.

Public-to-the-module helper used by |Slides.duplicate| AFTER the new
slide part is registered with the presentation rels. Wires the new
notes-slide's `RT.SLIDE` back-rel to point at `new_slide_part` (NOT
the source) — addresses upstream community gotcha #961 where blindly
copying notes rels left the duplicate's notes pointing at the source.
"""
src_notes_part = cast(NotesSlidePart, src_slide_part.part_related_by(RT.NOTES_SLIDE))
package = src_slide_part._package
new_partname = package.next_partname("/ppt/notesSlides/notesSlide%d.xml")
new_element = copy.deepcopy(src_notes_part._element)
new_notes_part = NotesSlidePart(new_partname, CT.PML_NOTES_SLIDE, package, new_element)

rId_map: dict[str, str] = {}
for rId, rel in src_notes_part.rels.items():
if rel.is_external:
new_rId = new_notes_part.relate_to(rel.target_ref, rel.reltype, is_external=True)
elif rel.reltype == RT.SLIDE:
# ---rewire back-ref to NEW slide part---
new_rId = new_notes_part.relate_to(new_slide_part, RT.SLIDE)
else:
# NOTES_MASTER and any others: share at package level
new_rId = new_notes_part.relate_to(rel.target_part, rel.reltype)
rId_map[rId] = new_rId
_remap_rId_attrs(new_element, rId_map)

new_slide_part.relate_to(new_notes_part, RT.NOTES_SLIDE)
return new_notes_part


class SlideLayoutPart(BaseSlidePart):
"""Slide layout part.
Expand Down
63 changes: 60 additions & 3 deletions src/pptx/slide.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pptx.dml.fill import FillFormat
from pptx.enum.shapes import PP_PLACEHOLDER
from pptx.opc.constants import RELATIONSHIP_TYPE as RT
from pptx.shapes.shapetree import (
LayoutPlaceholders,
LayoutShapes,
Expand Down Expand Up @@ -259,6 +260,19 @@ def delete(self) -> None:
prs = self.part.package.presentation_part.presentation
prs.slides.remove(self)

def duplicate(self, index: int | None = None) -> Slide:
"""Return a deep copy of this slide added to the parent presentation.

Convenience alias delegating to :meth:`Slides.duplicate`. The duplicate
is inserted at zero-based `index`; when `index` is |None|, the
duplicate sits at ``self_index + 1`` — immediately after this slide.

See :meth:`Slides.duplicate` for full semantics on dedup, notes-slide
handling, and round-trip behavior.
"""
prs = self.part.package.presentation_part.presentation
return prs.slides.duplicate(self, index)


class Slides(ParentedElementProxy):
"""Sequence of slides belonging to an instance of |Presentation|.
Expand Down Expand Up @@ -300,9 +314,9 @@ def add_slide(self, slide_layout: SlideLayout, index: int | None = None) -> Slid
are not supported; pass an explicit position. Raises |IndexError| if
`index` is out of range (negative, or greater than `len(self)`).

Companion operations: :meth:`remove`, :meth:`move`. Cross-deck copy
(``Presentation.append_from``) and ``Slide.duplicate`` are tracked
under issue #11 (Phase 2/3) and not yet implemented.
Companion operations: :meth:`remove`, :meth:`move`,
:meth:`duplicate`. Cross-deck copy (``Presentation.append_from``)
is tracked under issue #11 (Phase 3) and not yet implemented.
"""
# ---validate index BEFORE creating the new SlidePart so a bad index
# does not leak a partial part into the package---
Expand Down Expand Up @@ -368,6 +382,49 @@ def remove(self, slide: Slide) -> None:
self._sldIdLst.remove_sldId(sldId)
self.part.drop_rel(target_rId)

def duplicate(self, slide: Slide, index: int | None = None) -> Slide:
"""Return a deep copy of `slide` added to this collection.

The duplicate is inserted at zero-based position `index`. When
`index` is |None| (the default), the new slide is inserted at
``source_index + 1`` — immediately after `slide`. ``index`` may
equal ``len(self)`` to append explicitly. Negative indices are
not supported.

Image, media, slide-layout, and slide-master parts are shared
with the source via package-level dedup — duplicating a slide
that contains pictures does NOT increase the deck's image-part
count. Chart parts, OLE-object parts, and the notes-slide (when
present) are deep-copied so edits to the duplicate don't bleed
back into the source. Comments parts (if any) are dropped —
deferred to a later phase of issue #11.

Raises |ValueError| if `slide` is not a member of this
collection. Raises |IndexError| if `index` is out of range
(negative or greater than `len(self)`).
"""
from pptx.parts.slide import duplicate_notes_slide_for

# ---validate membership BEFORE doing any work; raises ValueError if absent---
src_idx = self.index(slide)
if index is None:
index = src_idx + 1
if index < 0 or index > len(self._sldIdLst):
raise IndexError("slide index out of range")

src_part = slide.part
new_slide_part = src_part.duplicate()

# ---register new slide part with presentation; this allocates an rId---
new_rId = self.part.relate_to(new_slide_part, RT.SLIDE)
self._sldIdLst.insert_sldId_at(new_rId, index)

# ---if source had a notes-slide, give the duplicate its own---
if src_part.has_notes_slide:
duplicate_notes_slide_for(src_part, new_slide_part)

return new_slide_part.slide


class SlideLayout(_BaseSlide):
"""Slide layout object.
Expand Down
Loading
Loading