Skip to content

Commit 53fea8f

Browse files
authored
Merge pull request #45 from MHoroszowski/feature/modernization-phase4
feat: shape-tree ergonomics — iter_leaf_shapes, find_by_xpath, mapping access, selection-pane order (Modernization Phase 4)
2 parents a6cb787 + 8d6b37c commit 53fea8f

5 files changed

Lines changed: 510 additions & 11 deletions

File tree

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
Feature: Modernization Phase 4 — shape-tree ergonomics
2+
In order to traverse, look up, and inspect shapes ergonomically
3+
As a developer using python-pptx
4+
I need iter_leaf_shapes, mapping-like name access, find_by_xpath, and selection-pane ordering
5+
6+
7+
Scenario: Mapping-like name access on a slide's shapes
8+
Given a fresh slide with a title placeholder
9+
Then shapes["Title 1"] returns the title shape
10+
And "Title 1" is in shapes
11+
And "Bogus" is not in shapes
12+
13+
14+
Scenario: Mapping-like name access on a slide's placeholders
15+
Given a fresh slide with a title placeholder
16+
Then placeholders["Title 1"] returns the title placeholder
17+
And "Title 1" is in placeholders
18+
19+
20+
Scenario: shapes.keys() returns the list of shape names
21+
Given a fresh slide with a title placeholder
22+
Then shapes.keys() includes "Title 1"
23+
24+
25+
Scenario: in_selection_pane_order reverses XML order
26+
Given a fresh slide with a title placeholder
27+
Then shapes.in_selection_pane_order() reverses iteration order
28+
29+
30+
Scenario: iter_leaf_shapes yields top-level shapes when no groups present
31+
Given a fresh slide with a title placeholder
32+
Then iter_leaf_shapes() yields the same shapes as iteration
33+
34+
35+
Scenario: find_by_xpath returns a non-empty list for a known element
36+
Given a fresh slide with a title placeholder
37+
Then title.find_by_xpath(".//p:nvSpPr") has length 1
38+
39+
40+
Scenario: find_by_xpath returns empty list on no match
41+
Given a fresh slide with a title placeholder
42+
Then title.find_by_xpath(".//a:nope_no_match") is empty
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Gherkin steps for Modernization Phase 4 (issue #29) — shape-tree ergonomics."""
2+
3+
from __future__ import annotations
4+
5+
from behave import then
6+
7+
8+
# the "Given a fresh slide with a title placeholder" step is shared from
9+
# features/steps/modernization_phase2.py (Phase 2)
10+
11+
12+
# then ====================================================
13+
14+
15+
@then('shapes["Title 1"] returns the title shape')
16+
def then_shapes_str_key_returns_title(context):
17+
sh = context.slide.shapes["Title 1"]
18+
assert sh.name == "Title 1", sh.name
19+
20+
21+
@then('"Title 1" is in shapes')
22+
def then_title_in_shapes(context):
23+
assert "Title 1" in context.slide.shapes
24+
25+
26+
@then('"Bogus" is not in shapes')
27+
def then_bogus_not_in_shapes(context):
28+
assert "Bogus" not in context.slide.shapes
29+
30+
31+
@then('placeholders["Title 1"] returns the title placeholder')
32+
def then_placeholders_str_key(context):
33+
ph = context.slide.placeholders["Title 1"]
34+
assert ph.name == "Title 1", ph.name
35+
36+
37+
@then('"Title 1" is in placeholders')
38+
def then_title_in_placeholders(context):
39+
assert "Title 1" in context.slide.placeholders
40+
41+
42+
@then('shapes.keys() includes "Title 1"')
43+
def then_shapes_keys_includes_title(context):
44+
assert "Title 1" in context.slide.shapes.keys()
45+
46+
47+
@then("shapes.in_selection_pane_order() reverses iteration order")
48+
def then_selection_pane_reverses(context):
49+
xml_order = [s.name for s in context.slide.shapes]
50+
sp_order = [s.name for s in context.slide.shapes.in_selection_pane_order()]
51+
assert sp_order == list(reversed(xml_order)), (sp_order, xml_order)
52+
53+
54+
@then("iter_leaf_shapes() yields the same shapes as iteration")
55+
def then_iter_leaf_matches_iter(context):
56+
leaves = [s.name for s in context.slide.shapes.iter_leaf_shapes()]
57+
top = [s.name for s in context.slide.shapes]
58+
assert leaves == top, (leaves, top)
59+
60+
61+
@then('title.find_by_xpath(".//p:nvSpPr") has length 1')
62+
def then_xpath_match_length_1(context):
63+
title = context.slide.shapes.title
64+
results = title.find_by_xpath(".//p:nvSpPr")
65+
assert len(results) == 1, len(results)
66+
67+
68+
@then('title.find_by_xpath(".//a:nope_no_match") is empty')
69+
def then_xpath_empty(context):
70+
title = context.slide.shapes.title
71+
assert title.find_by_xpath(".//a:nope_no_match") == []

src/pptx/shapes/base.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,19 @@
44

55
from typing import TYPE_CHECKING, cast
66

7+
from lxml.etree import _Element # pyright: ignore[reportPrivateUsage]
8+
79
from pptx.action import ActionSetting
810
from pptx.dml.effect import ShadowFormat
911
from pptx.shared import ElementProxy
1012
from pptx.util import lazyproperty
1113

14+
# ---bound to the lxml base method so `find_by_xpath(..., namespaces=ns)` can
15+
# ---honor the caller's prefix map without going through the project's
16+
# ---`BaseOxmlElement.xpath` override (which auto-applies the project nsmap
17+
# ---and rejects `namespaces=` kwarg).
18+
_LXML_XPATH = _Element.xpath
19+
1220
if TYPE_CHECKING:
1321
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
1422
from pptx.oxml.shapes import ShapeElement
@@ -65,6 +73,27 @@ def element(self) -> ShapeElement:
6573
"""
6674
return self._element
6775

76+
def find_by_xpath(self, xpath: str, namespaces: "dict[str, str] | None" = None) -> list:
77+
"""Power-user XPath escape hatch over this shape's element subtree.
78+
79+
Returns whatever ``lxml.etree._Element.xpath`` returns — typically a
80+
list of matching elements, or an empty list when the expression
81+
matches nothing. When ``namespaces`` is |None| (default), the
82+
project's standard namespace map is used so common prefixes
83+
(``a:``, ``p:``, ``r:``, ``xsi:``, ``adec:``, ``p14:``, etc.) work
84+
without explicit declaration. Pass a custom dict to override.
85+
86+
Example::
87+
88+
for t_elm in shape.find_by_xpath(".//a:t"):
89+
print(t_elm.text)
90+
"""
91+
if namespaces is None:
92+
# ---project's BaseOxmlElement.xpath auto-applies the standard nsmap---
93+
return self._element.xpath(xpath)
94+
# ---custom nsmap: bypass the project wrapper (see _LXML_XPATH note above)---
95+
return _LXML_XPATH(self._element, xpath, namespaces=namespaces)
96+
6897
@property
6998
def has_chart(self) -> bool:
7099
"""|True| if this shape is a graphic frame containing a chart object.

src/pptx/shapes/shapetree.py

Lines changed: 123 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,28 @@ def __init__(self, spTree: CT_GroupShape, parent: ProvidesPart):
8585
self._spTree = spTree
8686
self._cached_max_shape_id = None
8787

88-
def __getitem__(self, idx: int) -> BaseShape:
89-
"""Return shape at `idx` in sequence, e.g. `shapes[2]`."""
88+
def __getitem__(self, key: int | str) -> BaseShape:
89+
"""Return shape at `key`. Mapping-like dispatch by key type.
90+
91+
- Integer ``key`` returns the shape at that index in document
92+
order, e.g. ``shapes[2]``. Raises |IndexError| if out of range.
93+
- String ``key`` returns the shape whose ``.name`` equals ``key``
94+
(the same lookup as :meth:`by_name`), e.g. ``shapes["Title 1"]``.
95+
Raises |KeyError| with a clear message on miss.
96+
97+
``bool`` keys are rejected (|TypeError|) — they're a subclass of
98+
``int`` so would otherwise silently resolve to index 0/1, which
99+
is almost certainly an unintended call.
100+
101+
Closes scanny/python-pptx#800.
102+
"""
103+
if isinstance(key, bool):
104+
raise TypeError("shape key must be int or str, got bool")
105+
if isinstance(key, str):
106+
return self.by_name(key)
90107
shape_elms = list(self._iter_member_elms())
91108
try:
92-
shape_elm = shape_elms[idx]
109+
shape_elm = shape_elms[key]
93110
except IndexError:
94111
raise IndexError("shape index out of range")
95112
return self._shape_factory(shape_elm)
@@ -125,6 +142,67 @@ def by_name(self, name: str) -> BaseShape:
125142
return shape
126143
raise KeyError("no shape named %r in this collection" % name)
127144

145+
def __contains__(self, key: object) -> bool:
146+
"""Mapping-like membership: `"Title 1" in shapes` checks names.
147+
148+
- String key: True when any shape in this collection has a matching
149+
``.name`` (case-sensitive).
150+
- Integer key: True when ``0 <= key < len(self)`` — sequence-style
151+
index range check, matching `__getitem__(int)` semantics.
152+
153+
``bool`` and other key types return False (no implicit coercion;
154+
bools rejected for the same reason `__getitem__` rejects them —
155+
``True``/``False`` as an index is almost always a bug).
156+
"""
157+
if isinstance(key, bool):
158+
return False
159+
if isinstance(key, str):
160+
return any(shape.name == key for shape in self)
161+
if isinstance(key, int):
162+
return 0 <= key < len(self)
163+
return False
164+
165+
def keys(self) -> list[str]:
166+
"""List of every shape's ``.name`` in document order.
167+
168+
Mapping-like helper. Names may not be unique (PowerPoint doesn't
169+
enforce); duplicates appear in iteration order.
170+
"""
171+
return [shape.name for shape in self]
172+
173+
def iter_leaf_shapes(self) -> Iterator[BaseShape]:
174+
"""Recursively yield every non-group shape in this collection.
175+
176+
Descends into `GroupShape` children; the group containers themselves
177+
are NOT yielded — only the leaf shapes (autoshapes, pictures,
178+
connectors, text frames, tables, charts, placeholders, etc.) inside
179+
them. A consumer wanting the group containers should use the
180+
regular `for shape in shapes` iteration.
181+
182+
Closes scanny/python-pptx#435.
183+
"""
184+
# ---deferred import to avoid circular dependency---
185+
from pptx.shapes.group import GroupShape
186+
187+
for shape in self:
188+
if isinstance(shape, GroupShape):
189+
yield from shape.shapes.iter_leaf_shapes()
190+
else:
191+
yield shape
192+
193+
def in_selection_pane_order(self) -> tuple[BaseShape, ...]:
194+
"""Return shapes in PowerPoint's Selection Pane order.
195+
196+
The Selection Pane lists shapes from top-most (most recently drawn,
197+
rendered on top) to bottom-most. Top-most in PowerPoint is the
198+
last child in XML document order, so this is the reverse of
199+
``tuple(self)``. Read-only snapshot — does not auto-update if
200+
the collection changes after the call.
201+
202+
Closes scanny/python-pptx#532.
203+
"""
204+
return tuple(reversed(list(self)))
205+
128206
def clone_placeholder(self, placeholder: LayoutPlaceholder) -> None:
129207
"""Add a new placeholder shape based on `placeholder`."""
130208
sp = placeholder.element
@@ -859,22 +937,56 @@ def _shape_factory( # pyright: ignore[reportIncompatibleMethodOverride]
859937
class SlidePlaceholders(ParentedElementProxy):
860938
"""Collection of placeholder shapes on a slide.
861939
862-
Supports iteration, :func:`len`, and dictionary-style lookup on the `idx` value of the
863-
placeholders it contains.
940+
Supports iteration, :func:`len`, and dictionary-style lookup by both the
941+
`idx` value (int) and the placeholder ``.name`` (str).
864942
"""
865943

866944
_element: CT_GroupShape
867945

868-
def __getitem__(self, idx: int):
869-
"""Access placeholder shape having `idx`.
946+
def __getitem__(self, key: int | str):
947+
"""Access placeholder shape by `idx` value (int) or `.name` (str).
948+
949+
Note that while this looks like list access, integer ``key`` is a
950+
dictionary key against the placeholder's ``ph_idx`` (NOT a sequence
951+
index) and will raise |KeyError| if no placeholder with that idx
952+
is in the collection. String ``key`` looks up by ``.name`` and
953+
raises |KeyError| on miss. ``bool`` keys are rejected (|TypeError|)
954+
— they're a subclass of ``int`` so would otherwise silently resolve
955+
to a `ph_idx == 0/1` lookup, almost certainly unintended.
870956
871-
Note that while this looks like list access, idx is actually a dictionary key and will
872-
raise |KeyError| if no placeholder with that idx value is in the collection.
957+
Closes scanny/python-pptx#800.
873958
"""
959+
if isinstance(key, bool):
960+
raise TypeError("placeholder key must be int or str, got bool")
961+
if isinstance(key, str):
962+
for ph in self:
963+
if ph.name == key:
964+
return ph
965+
raise KeyError("no placeholder named %r in this collection" % key)
874966
for e in self._element.iter_ph_elms():
875-
if e.ph_idx == idx:
967+
if e.ph_idx == key:
876968
return SlideShapeFactory(e, self)
877-
raise KeyError("no placeholder on this slide with idx == %d" % idx)
969+
raise KeyError("no placeholder on this slide with idx == %d" % key)
970+
971+
def __contains__(self, key: object) -> bool:
972+
"""Mapping-like membership: `"Title 1" in placeholders` checks names.
973+
974+
- String key: True when any placeholder's ``.name`` matches.
975+
- Integer key: True when a placeholder with that ``ph_idx`` exists.
976+
- ``bool`` and other key types return False (bools rejected for the
977+
same reason `__getitem__` rejects them).
978+
"""
979+
if isinstance(key, bool):
980+
return False
981+
if isinstance(key, str):
982+
return any(ph.name == key for ph in self)
983+
if isinstance(key, int):
984+
return any(e.ph_idx == key for e in self._element.iter_ph_elms())
985+
return False
986+
987+
def keys(self) -> list[str]:
988+
"""List of every placeholder's ``.name`` in iteration order."""
989+
return [ph.name for ph in self]
878990

879991
def __iter__(self):
880992
"""Generate placeholder shapes in `idx` order."""

0 commit comments

Comments
 (0)