diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b14f6cd5..735edf76a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,21 +1,47 @@ name: ci +# Runs on every push to a branch in this repo and on every pull request +# targeting master. Tags push the same code through publish.yml separately. + on: - pull_request: - branches: [ master ] push: branches: - master - develop + - "release/**" + - "feature/**" + - "ci/**" + pull_request: + branches: + - master + workflow_dispatch: permissions: - contents: write + contents: read jobs: + lint: + name: Lint (ruff) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install ruff + run: python -m pip install --upgrade ruff + - name: ruff check + run: ruff check src tests + - name: ruff format check + run: ruff format --check src tests + continue-on-error: true # advisory until format pass is run repo-wide - build: + test: + name: Test (Python ${{ matrix.python-version }}) runs-on: ubuntu-latest strategy: + fail-fast: false matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: @@ -26,11 +52,35 @@ jobs: python-version: ${{ matrix.python-version }} - name: Display Python version run: python -c "import sys; print(sys.version)" - - name: Install test dependencies + - name: Install package and test deps run: | - pip install . - pip install -r requirements-test.txt - - name: Test with pytest - run: pytest --cov=pptx --cov-report term-missing tests - - name: Acceptance tests with behave + python -m pip install --upgrade pip + python -m pip install -e . + python -m pip install -r requirements-test.txt + - name: Unit + integration tests (pytest) + run: pytest --cov=pptx --cov-report=term-missing tests + - name: Acceptance tests (behave) run: behave --stop + + build-check: + name: Build sdist and wheel (smoke) + runs-on: ubuntu-latest + needs: test + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install build tooling + run: python -m pip install --upgrade build twine + - name: Build distributions + run: python -m build + - name: Verify metadata renders + run: python -m twine check dist/* + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: dist-${{ github.sha }} + path: dist/ + retention-days: 7 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..2d4476e1c --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,59 @@ +name: publish + +# Builds and publishes python-pptx-extended to PyPI using PyPI Trusted +# Publishing (OIDC). No long-lived API token is stored in repo secrets — the +# workflow's identity is verified by PyPI against the configured Trusted +# Publisher (see one-time setup in the PR description). +# +# Triggers: +# - GitHub Release published (recommended path: cut a release in the GH UI) +# - Manual workflow_dispatch (override / re-run) +# +# Tag pushes alone do not trigger this; create a Release pointing at the tag. + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + build: + name: Build sdist and wheel + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install build tooling + run: python -m pip install --upgrade build + - name: Build distributions + run: python -m build + - name: Verify metadata renders + run: | + python -m pip install --upgrade twine + python -m twine check dist/* + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + publish-pypi: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/project/python-pptx-extended/ + permissions: + id-token: write + steps: + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/HISTORY.rst b/HISTORY.rst index e1c4e8faf..d56135575 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,42 @@ Release History --------------- +1.2.0 (2026-05-05) +++++++++++++++++++ + +Adds first-class support for OOXML customXml — the mechanism Office.js, +SharePoint, and VSTO add-ins use to embed structured application data in +``.pptx`` files. See ``docs/user/custom-xml.rst`` for the user guide and +``docs/dev/analysis/customxml.rst`` for the OOXML analysis. + +- feature: ``Presentation.custom_properties`` — Mapping wrapper over + ``/docProps/custom.xml`` (Custom Document Properties; visible in + PowerPoint's *File → Properties → Advanced* UI). Type-dispatched + ``__setitem__`` plus explicit ``set_string`` / ``set_int`` / ``set_float`` / + ``set_bool`` / ``set_datetime`` setters when Python type inference does the + wrong thing. +- feature: ``Presentation.custom_xml_parts`` — Sequence wrapper over the + package's customXml data parts. ``add(xml, *, name=, datastoreItem_id=, + schema_refs=, scope=)`` supports both presentation-scoped (Office.js + default) and package-scoped (VSTO / SharePoint) topologies. Lookup via + index, partname tail, ``by_guid(...)``, or ``by_name(...)``. +- feature: ``CustomXmlParts.add_string_blob(name, content, mime_hint=, + encoding=)`` — convenience for the common "embed a string verbatim and + read it back" case (e.g. round-trip a markdown source document). +- feature: round-trip safety with files written by other tools — PPTX files + containing customXml parts authored by SharePoint, Office.js, or VSTO load + and save without losing their content. + +1.1.0 (2026-05-01) +++++++++++++++++++ + +- Fork of python-pptx 1.0.2 published as ``python-pptx-extended``. +- feature: full shadow effect API on ``ShadowFormat`` +- feature: bullet and numbered list paragraph formatting +- feature: per-edge table cell borders +- feature: ``cap_style`` and ``join_style`` properties on ``LineFormat`` +- feature: line-end shape types + 1.0.2 (2024-08-07) ++++++++++++++++++ diff --git a/Plans/customxml-implementation-plan.md b/Plans/customxml-implementation-plan.md new file mode 100644 index 000000000..987045eca --- /dev/null +++ b/Plans/customxml-implementation-plan.md @@ -0,0 +1,816 @@ +# Plan: customXml part manipulation in `python-pptx-extended` + +> **Status:** proposal — awaiting principal approval before implementation begins. +> **Scope:** add first-class read/write support for the two OOXML mechanisms that +> let an application embed structured data in a `.pptx`: +> +> 1. **Custom document properties** — `/docProps/custom.xml` (visible in PowerPoint UI under *File → Properties → Advanced*). +> 2. **CustomXml data parts** — `/customXml/itemN.xml` + `/customXml/itemPropsN.xml` (hidden from end users; the mechanism Office.js, SharePoint, and VSTO use). +> +> The first consumer is a CLI that round-trips a markdown source document, but the +> public API is general-purpose: provenance metadata, AI generation markers, +> template parameters, application-specific configuration, etc. + +--- + +## 1. Context + +### Why this fork + +Mainline `scanny/python-pptx` v0.4.1 made the loader *tolerate* customXml parts +(parts no longer trip the importer when present), but never exposed an API to +read, mutate, or create them. Issues +[#286](https://github.com/scanny/python-pptx/issues/286) (custom doc properties) +and [#578](https://github.com/scanny/python-pptx/issues/578) (custom tags) have +been open and unaddressed for years. Other forks (`python-pptx-ng`, +`python-pptx-fix`, `python-pptx-fork`) inherit the same gap. + +The pattern we are porting comes from +[`python-openxml/python-docx-oss`](https://github.com/python-openxml/python-docx-oss), +which solved the equivalent problem for `.docx` (`document.custom_properties`, +`document.part.custom_xml_parts[i].add_item(...)`). We adapt that surface to +PresentationML's relationship topology — most importantly, customXml data parts +must hang off `ppt/presentation.xml.rels` (presentation-scoped), not the package +root, or Office.js will not enumerate them +([MS Q&A](https://learn.microsoft.com/en-us/answers/questions/5586825/how-to-add-a-proper-customxml-to-a-powerpoint-pres)). + +### What the existing code already gives us for free + +A short codebase survey before signature design saved a lot of plumbing work: + +| Concern | Already in the fork | Where | +|---|---|---| +| Content-type constants | `CT.OFC_CUSTOM_PROPERTIES`, `CT.OFC_CUSTOM_XML_PROPERTIES`, `CT.XML` | `src/pptx/opc/constants.py:33–34, 170` | +| Relationship-type constants | `RT.CUSTOM_PROPERTIES`, `RT.CUSTOM_XML`, `RT.CUSTOM_XML_PROPS` | `src/pptx/opc/constants.py:220–229` | +| Auto-derived `[Content_Types].xml` | `_ContentTypesItem._defaults_and_overrides` reads `part.content_type` for every part; `xml` extension defaults to `application/xml` so `customXml/itemN.xml` lands under the default with no extra wiring | `src/pptx/opc/serialized.py:280–296` | +| Part-class registration | `PartFactory.part_type_for.update({...})` at module load | `src/pptx/__init__.py:35–69` | +| Pattern for property-style XML parts | `CorePropertiesPart` + `CT_CoreProperties` — a sibling pair we can copy | `src/pptx/parts/coreprops.py`, `src/pptx/oxml/coreprops.py` | +| Package-root vs. presentation-scoped relating | `package.relate_to(part, RT.X)` writes `/_rels/.rels`; `presentation_part.relate_to(part, RT.X)` writes `/ppt/_rels/presentation.xml.rels` | `src/pptx/opc/package.py:41–51, 357–361` | +| Lazy-load with graceful re-use | `lazyproperty` + `try part_related_by(...) / except KeyError: create-and-relate` | `src/pptx/package.py:19–30` (CoreProperties pattern) | +| `xmlchemy` machinery | `BaseOxmlElement`, `ZeroOrOne`, `ZeroOrMore`, `OptionalAttribute`, `RequiredAttribute`, `register_element_cls` | `src/pptx/oxml/xmlchemy.py`, `src/pptx/oxml/__init__.py` | + +**So no changes to constants, content-type registration, or the package writer +are required.** The work is: add new oxml classes, two new part subclasses, two +new collection wrappers, hang two properties off `Presentation`, and register +two content-types in `__init__.py`. + +--- + +## 2. Public API design + +> All examples assume `prs = Presentation("input.pptx")`. `Presentation` is the +> existing `pptx.presentation.Presentation` class. + +### 2.1 `Presentation.custom_properties` — typed dict-like + +Mirrors the docx-oss `CustomProperties` API. Each property is a `` +element under `/docProps/custom.xml`; values are typed via the `vt:` namespace +(`lpwstr`, `i4`, `r8`, `bool`, `filetime`). + +```python +class CustomProperties(Mapping[str, "CustomPropertyValue"]): + """Read/write Custom document properties (visible in PowerPoint UI).""" + + def __getitem__(self, name: str) -> str | int | float | bool | datetime: ... + def __setitem__(self, name: str, value: str | int | float | bool | datetime) -> None: ... + def __delitem__(self, name: str) -> None: ... + def __contains__(self, name: object) -> bool: ... + def __iter__(self) -> Iterator[str]: ... + def __len__(self) -> int: ... + + def get(self, name: str, default=None): ... + def keys(self) -> KeysView[str]: ... + def items(self) -> ItemsView[str, "CustomPropertyValue"]: ... + def values(self) -> ValuesView["CustomPropertyValue"]: ... + + # Explicit-typed setters when the dispatch by Python type is wrong + def set_string(self, name: str, value: str) -> None: ... + def set_int(self, name: str, value: int) -> None: ... + def set_float(self, name: str, value: float) -> None: ... + def set_bool(self, name: str, value: bool) -> None: ... + def set_datetime(self, name: str, value: datetime) -> None: ... +``` + +```python +prs = Presentation("input.pptx") +prs.custom_properties["Source"] = "deck-builder-cli@1.4.2" +prs.custom_properties["GeneratedAt"] = datetime.now(timezone.utc) +prs.custom_properties["BuildNumber"] = 42 +prs.custom_properties.set_string("FreeformNotes", "anything goes here") +del prs.custom_properties["Stale"] +prs.save("output.pptx") +``` + +**Type dispatch by Python type at `__setitem__`:** + +| Python type | `vt:` element | +|---|---| +| `str` | `vt:lpwstr` | +| `bool` (checked **before** `int`) | `vt:bool` | +| `int` | `vt:i4` | +| `float` | `vt:r8` | +| `datetime.datetime` | `vt:filetime` | + +Anything else raises `TypeError`. The explicit `set_*` methods exist for the +case where the caller wants `lpwstr` *string* representations of numbers, or +where future types are added (`vt:lpstr`, `vt:r8`, etc.). + +**`fmtid` and `pid`:** every `` element requires +`fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"` (the well-known Office FMTID) +and a `pid` ≥ 2 unique within the part. The collection auto-assigns `pid` (next +free integer ≥ 2). Callers never see `pid`. + +### 2.2 `Presentation.custom_xml_parts` — collection of arbitrary-XML parts + +Mirrors docx-oss `document.part.custom_xml_parts`. Each entry is a +`CustomXmlPart` paired with a `CustomXmlPropertiesPart` (its `itemPropsN.xml` +sibling carrying the `datastoreItem` GUID and any `schemaRefs`). + +```python +class CustomXmlParts(Sequence["CustomXmlPart"]): + """Collection of customXml data parts attached to the presentation.""" + + def __getitem__(self, key: int | str) -> "CustomXmlPart": + """Index by integer position OR by part name (e.g. 'item3.xml'). + + Use `.by_guid(...)` for datastoreItem-id lookup. + """ + + def __iter__(self) -> Iterator["CustomXmlPart"]: ... + def __len__(self) -> int: ... + + def by_guid(self, guid: str) -> "CustomXmlPart | None": + """Lookup by datastoreItem id (the GUID in itemPropsN.xml). Match is + case-insensitive and curly-brace-tolerant.""" + + def by_name(self, name: str) -> "CustomXmlPart | None": + """Lookup by application-assigned name. Names live in custom_properties + under a reserved `_pptx_customxml_name_` key — see §3.4.""" + + def add( + self, + xml: bytes | str | "lxml.etree._Element", + *, + name: str | None = None, + datastoreItem_id: str | None = None, + schema_refs: Iterable[str] | None = None, + scope: Literal["presentation", "package"] = "presentation", + ) -> "CustomXmlPart": + """Add a new customXml part with the given XML payload. + + Parameters + ---------- + xml + Raw XML — bytes, str, or an lxml `_Element`. Must be well-formed + XML; the caller owns the root element name and namespaces. Stored + verbatim (modulo any normalization lxml does on parse). + name + Optional application-assigned name. Stored as a custom document + property under `_pptx_customxml_name_`. See §3.4 + for why we do not use the `` attribute on `customXmlPart`. + datastoreItem_id + Optional GUID. If omitted, a new `uuid4()` is generated and wrapped + in curly braces ("{...}") to match Office's format. + schema_refs + Optional iterable of schema namespace URIs that this customXml part + claims to conform to. Written as `` + children of `` in itemProps. + scope + "presentation" (default) writes the relationship into + `ppt/_rels/presentation.xml.rels` — the topology Office.js + enumerates. "package" writes to `_rels/.rels` to match VSTO / + SharePoint patterns. The two are not exchangeable round-trip + (PowerPoint preserves the topology it was written with). + + Returns + ------- + The new `CustomXmlPart`. Already attached; nothing else to do before + `prs.save()`. + """ + + def remove(self, part: "CustomXmlPart | int | str") -> None: + """Remove the part (and its paired CustomXmlPropertiesPart) from the + package. Drops the relationship from whichever source (presentation or + package) currently owns it. Idempotent if already removed.""" +``` + +```python +class CustomXmlPart: + """A single customXml/itemN.xml + customXml/itemPropsN.xml pair.""" + + @property + def name(self) -> str | None: + """Application-assigned name from custom_properties, or None.""" + + @property + def datastoreItem_id(self) -> str: + """GUID identifying the part across edits (e.g. '{1A2B...}').""" + + @datastoreItem_id.setter + def datastoreItem_id(self, value: str) -> None: ... + + @property + def schema_refs(self) -> tuple[str, ...]: + """Tuple of `ds:schemaRef ds:uri` values from itemProps.""" + + @schema_refs.setter + def schema_refs(self, value: Iterable[str]) -> None: ... + + @property + def scope(self) -> Literal["presentation", "package"]: + """Where this part's relationship is currently rooted (read-only; + change via remove + re-add).""" + + @property + def partname(self) -> str: + """Package URI, e.g. '/customXml/item3.xml'.""" + + @property + def element(self) -> "lxml.etree._Element": + """Live root element of the customXml payload. Mutating it mutates the + part. For replace-whole-payload semantics, use `.replace_xml(...)`.""" + + @property + def blob(self) -> bytes: + """Serialized bytes of the customXml payload (with XML declaration).""" + + def replace_xml(self, xml: bytes | str | "lxml.etree._Element") -> None: + """Replace the entire payload with `xml`. The root element is + replaced, not merged. Preserves datastoreItem_id and schema_refs (those + live in the sibling itemProps part).""" + + # docx-oss compatibility shim — only present if we adopt it (see §8 Q1) + def add_item(self, tag: str, text: str = "", **attrs: str) -> "lxml.etree._Element": + """Append a child element `text` with attributes. Returns the + appended element. Convenience for the common "flat list of items" + shape; for arbitrary structure use `.element` directly.""" +``` + +```python +import uuid +prs = Presentation("input.pptx") + +# General case — arbitrary XML +part = prs.custom_xml_parts.add( + b""" + + deck-builder-cli + 2026-05-05T14:00:00Z + """, + name="provenance", + schema_refs=["urn:my-app:provenance"], +) +print(part.datastoreItem_id) # auto-assigned GUID + +# Lookup +same = prs.custom_xml_parts.by_name("provenance") +assert same is part +also_same = prs.custom_xml_parts.by_guid(part.datastoreItem_id) + +# Mutate +same.element.find("{urn:my-app:provenance}source").text = "deck-builder-cli@1.4.3" + +prs.save("output.pptx") +``` + +### 2.3 String-blob helper — the primary use case + +Most callers want "stash this string verbatim, give it back to me on read." +Wrapping it in a one-element XML envelope keeps it valid OOXML and lets the +mime hint round-trip: + +```python +def add_string_blob( + self, + name: str, + content: str, + *, + mime_hint: str | None = None, + encoding: Literal["text", "base64"] = "text", + scope: Literal["presentation", "package"] = "presentation", +) -> "CustomXmlPart": + """Embed a string payload as a customXml part. + + Wraps `content` in: + ... + + For binary or non-XML-safe text, set encoding="base64" and pass already- + encoded content (the helper does NOT auto-base64; the caller is + responsible). Round-trip: read with `.element.text` or via the helper + `read_string_blob(name)`.""" + +def read_string_blob(self, name: str) -> str | None: + """Return content of the blob part with `name`, or None if not present. + If encoding='base64', returns the still-encoded string — the caller + decodes.""" +``` + +The `urn:python-pptx:blob` envelope namespace is reserved for this fork's +helpers. Callers using `.add(...)` directly are free to use any namespace they +want. + +### 2.4 Property accessors on `Presentation` + +Two new properties on `pptx.presentation.Presentation`: + +```python +@property +def custom_properties(self) -> CustomProperties: + """CustomProperties instance for /docProps/custom.xml. Created on first + access if the part does not yet exist (consistent with .core_properties). + """ + +@property +def custom_xml_parts(self) -> CustomXmlParts: + """Collection of customXml data parts. Always returns the same collection + instance for a given Presentation.""" +``` + +Both delegate through `self.part` to the `PresentationPart`, which owns the +lazy-loaded helpers — same pattern as `core_properties`. + +--- + +## 3. Internal architecture + +### 3.1 New files + +| Path | Purpose | +|---|---| +| `src/pptx/parts/custom_properties.py` | `CustomPropertiesPart(XmlPart)` — `/docProps/custom.xml` | +| `src/pptx/parts/custom_xml.py` | `CustomXmlPart(XmlPart)`, `CustomXmlPropertiesPart(XmlPart)` | +| `src/pptx/oxml/custom_properties.py` | `CT_CustomProperties`, `CT_Property`, value-type element classes | +| `src/pptx/oxml/custom_xml.py` | `CT_DatastoreItem`, `CT_DatastoreSchemaRef` | +| `src/pptx/custom_properties.py` | `CustomProperties` (Mapping wrapper) | +| `src/pptx/custom_xml.py` | `CustomXmlParts` (Sequence wrapper), `CustomXmlPart` user-facing facade | + +Layering rationale (matches the rest of the codebase): + +- `oxml/*` — pure XML element classes; no relationship logic; xmlchemy types only. +- `parts/*` — `XmlPart` subclasses; own a single `_element`; `lazyproperty` + helpers but no end-user collections. +- `custom_properties.py`, `custom_xml.py` (top-level) — user-facing wrappers + (Mapping/Sequence) that the principal hangs off `Presentation`. Mirrors how + `pptx/slide.py` (`Slides`, `SlideMasters`) lives next to `pptx/presentation.py`. + +### 3.2 Modified files + +| Path | Change | Rationale | +|---|---|---| +| `src/pptx/__init__.py` | Add three rows to `content_type_to_part_class_map` (CT.OFC_CUSTOM_PROPERTIES, CT.OFC_CUSTOM_XML_PROPERTIES, and CT.XML → CustomXmlPart **only when partname matches `/customXml/item*.xml`**, see §3.6) | Register part subclasses with the factory | +| `src/pptx/presentation.py` | Add `custom_properties` and `custom_xml_parts` properties | User-facing surface | +| `src/pptx/parts/presentation.py` | Add `custom_properties` lazyproperty, `custom_xml_parts` lazyproperty, helper for "find or create" the parts under the right relationship scope | Where the part-graph wiring lives | +| `src/pptx/package.py` | Add `custom_properties` lazyproperty (mirrors `core_properties`) — package-root scope is correct for `/docProps/custom.xml` per OOXML convention | Package-root relating | +| `src/pptx/oxml/__init__.py` | `register_element_cls(...)` calls for the new oxml classes | Standard registration | +| `src/pptx/types.py` | (Optional) `CustomPropertyValue` type alias for the union | Keep public `__init__.py` clean | +| `pyproject.toml` / `HISTORY.rst` | Bump minor version, log change | Release hygiene | + +**No changes** to `src/pptx/opc/constants.py`, `src/pptx/opc/serialized.py`, +`src/pptx/opc/package.py`, or `src/pptx/opc/spec.py`. The constants and content +types we need are already there; the writer auto-derives content types per +part. + +### 3.3 Content type and relationship plumbing — no new constants + +Verified by reading `src/pptx/opc/constants.py:33–34, 170, 220–229`: + +```text +CT.OFC_CUSTOM_PROPERTIES = "application/vnd.openxmlformats-officedocument.custom-properties+xml" +CT.OFC_CUSTOM_XML_PROPERTIES = "application/vnd.openxmlformats-officedocument.customXmlProperties+xml" +CT.XML = "application/xml" +RT.CUSTOM_PROPERTIES = ".../custom-properties" +RT.CUSTOM_XML = ".../customXml" +RT.CUSTOM_XML_PROPS = ".../customXmlProps" +``` + +`_ContentTypesItem._defaults_and_overrides` (`opc/serialized.py:280–296`) reads +each part's `.content_type` and emits Default-or-Override entries automatically. +Since the `xml` extension already maps to `application/xml` in the default dict, +`/customXml/itemN.xml` (content_type `application/xml`) needs no Override +(Office writes the same way). `/customXml/itemPropsN.xml` becomes an Override +because its content type differs from `application/xml`. `/docProps/custom.xml` +becomes an Override (custom-properties+xml). + +### 3.4 Custom-name storage decision + +The OOXML spec does **not** define a "name" attribute on a customXml part. +docx-oss's `add_item` stores tags as XML elements; lookup by name there is by +the *element tag*, not the part. We need part-level naming for +`custom_xml_parts.by_name("provenance")`. + +**Two options:** + +- **(Chosen, default plan)** Store names as a custom document property keyed by + the part's `datastoreItem_id`: `_pptx_customxml_name_{guid}` → `name`. + Lossless, round-trips through PowerPoint, no schema invention. Cost: every + `add(name=...)` also touches `/docProps/custom.xml`. +- (Rejected) Add a `` child to `itemProps` with a custom + attribute. Office tolerates it but other tools may strip it; not portable. + +**Open question Q3 in §8** — confirm the chosen approach before coding. + +### 3.5 Relationship topology — default and override + +| Part | Default scope | Source rels file | Override flag | Why | +|---|---|---|---|---| +| `CustomPropertiesPart` (`/docProps/custom.xml`) | package-root | `/_rels/.rels` | none | Office writes it here; sibling of `core.xml` | +| `CustomXmlPart` (`/customXml/itemN.xml`) | presentation-scoped | `/ppt/_rels/presentation.xml.rels` | `scope="package"` on `add(...)` | Office.js / PowerPoint UI only enumerate presentation-scoped customXml | +| `CustomXmlPropertiesPart` (`/customXml/itemPropsN.xml`) | from its CustomXmlPart | `/customXml/_rels/itemN.xml.rels` (always) | n/a | Always a child of the data part | + +The `scope` parameter on `CustomXmlParts.add` is the override hatch. Round-trip +test fixtures in §5.3 cover both topologies. + +### 3.6 PartFactory ambiguity around `application/xml` + +`PartFactory` keys on `content_type` alone. But `/customXml/itemN.xml` has +content_type `application/xml`, which is also the catch-all for unrelated XML +parts. Two options: + +- **(Chosen)** Register `CT.XML → Part` (the base class — the existing default + behavior) and **resolve `CustomXmlPart` by partname pattern** at the + `PresentationPart.custom_xml_parts` level: enumerate `RT.CUSTOM_XML` + relationships, wrap each `target_part` in a `CustomXmlPart` facade *if not + already*. The facade carries the `_element` reference and writes through. No + new factory ambiguity. +- (Rejected) Subclass `Part` and re-resolve at load time. Risks promoting + unrelated `application/xml` parts to `CustomXmlPart` and breaking unrelated + XML-typed parts in third-party PPTX files. + +Note: `CustomXmlPropertiesPart` has its own dedicated content type +(`OFC_CUSTOM_XML_PROPERTIES`) so it registers normally with the factory; only +the data part is ambiguous. + +### 3.7 Loading existing customXml parts + +`OpcPackage._load` already walks every `.rels` file and constructs a `Part` for +every targeted partname (`opc/package.py:240–278`). With `CT.OFC_CUSTOM_XML_PROPERTIES` +mapped to `CustomXmlPropertiesPart` and `CT.OFC_CUSTOM_PROPERTIES` mapped to +`CustomPropertiesPart` in `__init__.py`, those load automatically. The data +part loads as a base `Part` (or `XmlPart` if we pre-register `CT.XML` to +`XmlPart` — TBD; see Q4). The `CustomXmlParts` collection finds them by walking +`RT.CUSTOM_XML` relationships from both the package and the presentation part. + +This means files saved by SharePoint, Office.js, or VSTO will round-trip +without code changes — we only need to *enumerate* both relationship sources, +which §3.5 already accounts for. + +--- + +## 4. Compatibility & migration + +### 4.1 Backward compatibility + +- **No public API removed or renamed.** Two new properties on `Presentation`, + one new property on `Package`, and four new module files. +- **No change to `[Content_Types].xml` for files that do not use the new + features.** A presentation produced by code that never touches + `prs.custom_properties` or `prs.custom_xml_parts` writes byte-equivalent + output (modulo any unrelated changes). +- **PPTX files containing customXml parts written by other tools** load today + thanks to mainline's v0.4.1 hotfix; this PR just makes them visible. No + loader regressions expected — verified by the fixture matrix in §5.3. + +### 4.2 Consistency with existing property APIs + +`custom_properties` is intentionally shaped to mirror `core_properties`: + +| Aspect | `core_properties` | `custom_properties` | +|---|---|---| +| Lazy-create on first access | yes | yes | +| Surface on | `Presentation` and `Package` | `Presentation` and `Package` | +| Underlying part subclass | `CorePropertiesPart` | `CustomPropertiesPart` | +| Per-element setters/getters | typed properties | dict-like, type-dispatched | + +The dict-like shape diverges because custom properties are user-keyed, not a +fixed Dublin Core vocabulary. This is the same divergence docx-oss made. + +### 4.3 Slide / shape scope — explicitly deferred + +PresentationML has a third mechanism: per-slide and per-shape custom data via +``, where each `tag` +relationship targets a `tags+xml` part (`CT.PML_TAGS`). This is the mechanism +issue [#578](https://github.com/scanny/python-pptx/issues/578) asks for, and +what `singerla/pptx-automizer` exposes. + +This PR does **not** add per-slide or per-shape tag APIs. Reasons: + +1. The first consumer (markdown round-trip) wants presentation-scoped data, not + per-slide. +2. Per-shape `custDataLst` is plumbed differently (slide-rels, not + presentation-rels) and deserves its own PR with its own API surface. +3. Keeping this PR small reduces review surface and lets the + presentation-scoped code stabilize first. + +A follow-up PR (referenced in §6) will add `Slide.custom_xml_parts` / +`Shape.custom_xml_parts` once this lands. + +--- + +## 5. Testing strategy + +### 5.1 Unit tests — oxml classes + +**Pattern:** copy `tests/oxml/test_*.py` style. Pure XML in / XML out, no I/O. + +| File | Coverage | +|---|---| +| `tests/oxml/test_custom_properties.py` | `CT_CustomProperties` (root), `CT_Property` (each ``), value-type elements (`vt:lpwstr`, `vt:i4`, `vt:bool`, `vt:filetime`, `vt:r8`). Round-trip parse → mutate → serialize. | +| `tests/oxml/test_custom_xml.py` | `CT_DatastoreItem` and `CT_DatastoreSchemaRef`. Verify `itemID` GUID format, schema-ref add/remove. | + +### 5.2 Unit tests — parts and collection wrappers + +| File | Coverage | +|---|---| +| `tests/parts/test_custom_properties.py` | `CustomPropertiesPart.default(...)`, getter/setter type dispatch, deletion, pid auto-assignment. Uses synthetic XML fixtures the way `tests/parts/test_coreprops.py:1–198` does. | +| `tests/parts/test_custom_xml.py` | `CustomXmlPart.replace_xml(...)`, `datastoreItem_id` round-trip, `schema_refs` add/remove, paired itemProps part lifecycle (add data → itemProps auto-created; remove data → itemProps removed). | +| `tests/test_custom_properties.py` | `CustomProperties` Mapping protocol — `__getitem__`, `__setitem__`, `__delitem__`, `__contains__`, `__iter__`, `__len__`, type dispatch, `set_string`/etc., raises on unknown type. | +| `tests/test_custom_xml.py` | `CustomXmlParts` — `add(...)`, `remove(...)`, `by_name`, `by_guid`, indexing, scope=package vs presentation, name-storage in custom_properties. | + +### 5.3 Integration tests — full-package round-trip + +Place fixtures under `tests/test_files/customxml/` (new directory). Each fixture +is a real `.pptx` from a different ecosystem; round-trip = open + save + open + +diff payload. + +| Fixture | Origin | What it proves | +|---|---|---| +| `sharepoint-saved.pptx` | A presentation saved through SharePoint with VSTO-injected customXml at *package* scope | Loader handles package-root `RT.CUSTOM_XML` and we round-trip without dropping it | +| `officejs-added.pptx` | Office.js `addCustomXmlPart` output (presentation scope) | The "happy path" — Office.js semantics | +| `vsto-document-toolkit.pptx` | A VSTO-tooled deck with `ds:itemID` schema refs | `schema_refs` survive | +| `manual-multipart.pptx` | Hand-crafted with two customXml items + custom properties | N>1 handling | +| `our-output.pptx` | Generated by the test itself using the new API | Sanity check | + +Tests: + +1. `test_round_trip_preserves_payload` — open, save, re-open; assert + `custom_xml_parts[i].blob == original_blob` byte-for-byte (modulo lxml + re-serialization normalization, which is deterministic). +2. `test_round_trip_preserves_topology` — assert package-scope fixtures still + relate from package root after save; presentation-scope fixtures still relate + from presentation rels after save. +3. `test_load_with_no_customxml_unchanged` — open a PPTX with no customXml, + touch nothing, save; assert byte-equivalent (or content-types/rels are at + least set-equal — see Q5). +4. `test_core_properties_unaffected` — open a PPTX, set both + `core_properties.author` and `custom_properties["foo"]`; save; re-open; + assert both round-trip. + +### 5.4 Manual verification — PowerPoint UI + +The integration test plan does not — and cannot — verify that PowerPoint itself +considers the output legal. Add a manual checklist to the PR description: + +- [ ] Open `our-output.pptx` in PowerPoint 365 (Mac and Windows). No repair + prompt. +- [ ] *File → Properties → Advanced* shows the custom properties. +- [ ] Open in LibreOffice. Document the behavior (LibreOffice preserves + package-root customXml but historically strips presentation-scoped data + parts). +- [ ] Open in OnlyOffice / DocumentServer. Document. (See + [ONLYOFFICE/DocumentServer#1564](https://github.com/ONLYOFFICE/DocumentServer/issues/1564) + for known gaps.) + +The doc page (§6.2) records what we observed so users have realistic +expectations. + +### 5.5 Coverage target + +Match the project's existing standard (≥95% line coverage on new modules per +`pyproject.toml`). Run with `tox -e py311` (existing tox config). + +--- + +## 6. Documentation + +### 6.1 User guide page — `docs/user/custom-xml.rst` + +Style: match the other `docs/user/*.rst` pages (`presentations.rst`, +`notes.rst`). Sections: + +1. Overview — when to use custom doc properties vs. customXml data parts. +2. Reading and writing custom document properties (with the full type table). +3. Reading and writing customXml data parts (with the string-blob example *and* + the arbitrary-XML example). +4. Round-trip caveats — what PowerPoint preserves, what LibreOffice / OnlyOffice + may strip. (Reference §5.4 manual matrix.) +5. Choosing the relationship scope (default vs. `scope="package"` and why). + +### 6.2 Dev analysis page — `docs/dev/analysis/customxml.rst` + +Match the existing `docs/dev/analysis/*` style (one analysis per OOXML feature, +ECMA-376 references, sample XML, schema diagrams in ASCII). Sections: + +1. ECMA-376 references — Part 1 §15.2.4 (Custom XML Data Storage Part) and + §15.2.12 (Custom File Properties Part). +2. Sample XML for `/docProps/custom.xml`, `/customXml/item1.xml`, + `/customXml/itemProps1.xml`. +3. Relationship topology diagram — package vs. presentation scope. +4. Why the well-known FMTID is fixed. +5. The `application/xml` content-type ambiguity and how `python-pptx-extended` + resolves it (§3.6). +6. The `_pptx_customxml_name_` storage convention (§3.4). + +### 6.3 API reference + +Add `docs/api/custom_properties.rst` and `docs/api/custom_xml.rst` with the +auto-doc directives. Update `docs/api/presentation.rst` to mention the two new +properties. + +### 6.4 HISTORY.rst + +A 1.2.0 entry summarizing the feature (the fork's version-bump pattern from +`Plans/review-the-guide-at-swift-kahn.md`). + +--- + +## 7. Phased implementation order + +The phases below assume a dedicated `feature/customxml` branch (matches the +`feature/*` branch convention in `git log`). + +### Phase 1 — oxml foundation (no public API) + +- `src/pptx/oxml/custom_properties.py` — `CT_CustomProperties`, `CT_Property`, + value-type elements. +- `src/pptx/oxml/custom_xml.py` — `CT_DatastoreItem`, `CT_DatastoreSchemaRef`. +- `src/pptx/oxml/__init__.py` — register element classes. +- Tests: `tests/oxml/test_custom_properties.py`, `tests/oxml/test_custom_xml.py`. + +**Deliverable:** new oxml classes parse and serialize round-trip. No +behavior change for callers. + +### Phase 2 — Part subclasses + +- `src/pptx/parts/custom_properties.py` — `CustomPropertiesPart` with the + `default` factory and per-element accessors. +- `src/pptx/parts/custom_xml.py` — `CustomXmlPart`, `CustomXmlPropertiesPart`, + with paired-creation logic (`new_pair(package, ...)`). +- `src/pptx/__init__.py` — register the new content types in + `content_type_to_part_class_map`. +- Tests: `tests/parts/test_custom_properties.py`, `tests/parts/test_custom_xml.py`. + +**Deliverable:** parts load and save correctly when present in a PPTX file. +Still no Presentation-level surface. + +### Phase 3 — Public collections and Presentation hooks + +- `src/pptx/custom_properties.py` — `CustomProperties` mapping wrapper. +- `src/pptx/custom_xml.py` — `CustomXmlParts` sequence wrapper. +- `src/pptx/parts/presentation.py` — lazyproperties to expose them. +- `src/pptx/package.py` — `custom_properties` lazyproperty. +- `src/pptx/presentation.py` — `custom_properties` and `custom_xml_parts` + properties. +- Tests: `tests/test_custom_properties.py`, `tests/test_custom_xml.py`, + `tests/test_presentation.py` additions. + +**Deliverable:** end-to-end usage works against the synthetic test fixtures. + +### Phase 4 — String-blob helper and integration tests + +- `add_string_blob` / `read_string_blob` on `CustomXmlParts`. +- `tests/test_files/customxml/` fixture set (§5.3). +- `tests/integration/test_customxml_roundtrip.py` — end-to-end open-save-reopen. + +**Deliverable:** the immediate use case (markdown blob round-trip) is +exercisable from a CLI. + +### Phase 5 — Documentation and release + +- `docs/user/custom-xml.rst`, `docs/dev/analysis/customxml.rst`, API ref pages. +- `HISTORY.rst` entry, `pyproject.toml` version bump (e.g. `1.1.0` → `1.2.0`). +- Manual PowerPoint UI matrix (§5.4) executed and recorded in the PR + description. +- Tag and publish (matches the trusted-publishing workflow on the current + branch). + +**Deliverable:** PR ready for principal review. + +### Critical-path dependencies + +``` +Phase 1 ──▶ Phase 2 ──▶ Phase 3 ──▶ Phase 4 ──▶ Phase 5 + (Phase 2 depends on Phase 1's element classes) + (Phase 3 depends on Phase 2's parts) + (Phase 4 depends on Phase 3's public API) +``` + +Phases 1–3 are testable in isolation. Phase 4's fixtures need real third-party +PPTX files, which is why integration tests come last (and partially gate manual +verification, §5.4). + +--- + +## 8. Open questions and decisions for the principal + +Numbered for easy reference. Defaults shown so that, if the principal is +indifferent, the plan is unblocked. + +**Q1. `add_item(tag, text, **attrs)` shim?** +docx-oss exposes this convenience on `CustomXmlPart`. Useful for "flat list of +items" callers; redundant for callers using `.element` directly. +*Default:* **include it** — low cost, parity with docx-oss, keeps the +"learn-once" surface across the python-openxml family. + +**Q2. Distribution name and version.** +Per `Plans/review-the-guide-at-swift-kahn.md`, the fork ships as +`python-pptx-extended` on PyPI. This feature warrants a minor bump (`1.1.0` → +`1.2.0`). +*Default:* **`python-pptx-extended==1.2.0`.** Confirm if you'd rather batch this +with other unreleased fork features instead of a dedicated release. + +**Q3. Custom-name storage mechanism (§3.4).** +*Default:* **store names as a reserved custom document property keyed by +datastoreItem GUID** (`_pptx_customxml_name_`). Lossless, round-trips +through PowerPoint. +*Alternative:* skip name-based lookup entirely; require the caller to track +GUIDs themselves. This is what docx-oss does (no `by_name` on +`custom_xml_parts`). Smaller API, but the markdown-round-trip CLI use case +clearly wants a name. + +**Q4. Should `CustomXmlPart` register against `CT.XML`?** +*Default:* **No** — leave `CT.XML` mapping to base `Part` and let the +`CustomXmlParts` collection wrap-on-enumerate (§3.6). Avoids accidentally +upgrading unrelated `application/xml` parts. +*Alternative:* register it and accept the broader scope. Easier loader code, +risk of false positives. + +**Q5. Byte-exact preservation of files we do not modify.** +The integration test plan compares payloads, not byte streams (§5.3 test 3). +lxml re-serialization can change attribute order, whitespace, and XML +declaration form even when content is identical. +*Default:* **assert content equivalence (parsed AST equal), not byte +equivalence.** Match scanny upstream's posture. +*Alternative:* invest in a custom serializer that preserves original byte form +for unmodified parts. Significant scope creep; not recommended for this PR. + +**Q6. `Slide.custom_xml_parts` / `Shape.custom_xml_parts`.** +*Default:* **out of scope for this PR** (§4.3). Will be a follow-up that +covers `` and the slide-rels-rooted topology — issue +#578. +Confirm you agree with deferring this; if you'd rather have one big PR, the +estimate roughly doubles in size (more parts, slide-rels handling, per-shape +API design questions). + +**Q7. License headers / attribution to docx-oss.** +The pattern is original to docx-oss (BSD-licensed). MIT (this fork) is +compatible. +*Default:* **add a one-line attribution at the top of `custom_xml.py` and +`custom_properties.py`** noting the docx-oss inspiration with a URL. No code is +copied verbatim; only the API shape is borrowed. + +**Q8. Versioning of the generated XML (e.g. provenance metadata about +*python-pptx-extended* itself).** +Some tools stamp the output with a generator hint (e.g. +``). Easy to add to the +string-blob envelope. +*Default:* **don't add this.** Keep the helper minimal; callers who want +provenance write it themselves. + +--- + +## 9. Summary of scope boundaries + +**In scope (this PR):** + +- Read/write/create custom document properties (`/docProps/custom.xml`). +- Read/write/create customXml data parts (`/customXml/itemN.xml` + + `/customXml/itemPropsN.xml`) at presentation scope (default) or package + scope (override). +- String-blob helper for the immediate use case. +- Round-trip safety with files written by SharePoint, Office.js, and VSTO. +- Documentation matching the project's user-guide and analysis-page styles. + +**Out of scope (this PR — explicit):** + +- Per-slide custom data (``) — issue #578, follow-up PR. +- Per-shape custom data — same follow-up. +- Office.js-style schema validation (we accept `schema_refs` as opaque URIs + but do not validate payloads against any schema). +- Content controls / structured document tags (SDT) bound to customXml — that's + a wordprocessingML feature anyway. +- Byte-perfect preservation of files we do not modify (Q5). +- Auto-encoding for `add_string_blob(encoding="base64")` — caller pre-encodes. +- Cross-filesystem name uniqueness checks — `add(name="x")` does not raise if + another part already has name `"x"`; the principal manages namespacing. + +--- + +## 10. Acceptance — what the principal sees on PR open + +1. Branch `feature/customxml` against `ci/pypi-trusted-publishing` (or + wherever the principal points). +2. ~30 new test cases, ~95% line coverage on the new modules. +3. ~4 fixtures under `tests/test_files/customxml/` documenting the third-party + topologies. +4. `docs/user/custom-xml.rst` + `docs/dev/analysis/customxml.rst` rendering on + ReadTheDocs. +5. PR description with the manual-verification matrix from §5.4 filled in. +6. `HISTORY.rst` entry under a new `1.2.0` heading. +7. Examples in the user guide that exercise both the immediate (markdown blob) + and the general (arbitrary XML) cases. + +--- + +*Plan author: Athena, on behalf of Matthew Horoszowski. Awaiting principal +approval before Phase 1 begins.* diff --git a/README.rst b/README.rst index 24d657b37..f3e958de3 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,31 @@ +Fork notice +----------- + +This distribution, ``python-pptx-extended``, is a fork of +`scanny/python-pptx`_ at upstream version 1.0.2. The import name is unchanged +(``import pptx``), so existing user code continues to work. The fork adds the +following features on top of upstream: + +- Full shadow effect API on ``ShadowFormat`` (outer/inner/preset shadows). +- Bullet and numbered list formatting on paragraphs. +- Per-edge border styling for table cells. +- ``cap_style`` and ``join_style`` properties on ``LineFormat``. +- Line-end shape types (arrow / triangle / oval / etc.). +- OOXML customXml support — ``Presentation.custom_properties`` (Mapping over + custom document properties) and ``Presentation.custom_xml_parts`` + (Sequence over customXml data parts), supporting both + presentation-scoped (Office.js default) and package-scoped (VSTO / + SharePoint) topologies. + +Because the import package name (``pptx``) is shared with the upstream +distribution, ``python-pptx`` and ``python-pptx-extended`` cannot be installed +into the same environment — install one or the other. + +.. _`scanny/python-pptx`: https://github.com/scanny/python-pptx + +About python-pptx +----------------- + *python-pptx* is a Python library for creating, reading, and updating PowerPoint (.pptx) files. diff --git a/docs/dev/analysis/customxml.rst b/docs/dev/analysis/customxml.rst new file mode 100644 index 000000000..bbe4214e8 --- /dev/null +++ b/docs/dev/analysis/customxml.rst @@ -0,0 +1,190 @@ +.. _CustomXml: + +CustomXml and Custom Document Properties +========================================= + +Two distinct OOXML mechanisms support embedding application-specific data in +a ``.pptx`` package: + +1. **Custom Document Properties** at ``/docProps/custom.xml`` — visible in + PowerPoint UI under *File → Properties → Advanced*. ECMA-376 Part 1 §15.2.12. +2. **CustomXml data parts** at ``/customXml/itemN.xml`` paired with + ``/customXml/itemPropsN.xml`` — hidden from end users; the mechanism + Office.js, SharePoint workflows, and VSTO add-ins use to embed structured + data. ECMA-376 Part 1 §15.2.4. + + +Custom Document Properties +-------------------------- + +XML specimen +~~~~~~~~~~~~ + +.. highlight:: xml + +:: + + + + + deck-builder-cli@1.4.2 + + + 42 + + + true + + + 2026-05-05T14:00:00Z + + + +Notable details +~~~~~~~~~~~~~~~ + +* The ``fmtid`` attribute is the same well-known GUID + ``{D5CDD505-2E9C-101B-9397-08002B2CF9AE}`` for every user-defined property. + Office uses different FMTIDs for system-defined property sets (e.g. SharePoint + fields), but |pp| writes the user-defined FMTID exclusively. +* ``pid`` values 0 and 1 are reserved by the spec; user properties start at 2. + |pp| auto-assigns the next free integer ≥ 2 within the part. +* The typed value child belongs to the ``vt:`` namespace + (``http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes``). + Five types are supported: ``lpwstr`` (Unicode string), ``i4`` (32-bit signed + int), ``r8`` (IEEE-754 double), ``bool``, and ``filetime`` + (ISO-8601 UTC, ``Z``-suffixed). + + +CustomXml data parts +-------------------- + +Each customXml entry is a **pair** of parts: one for the user's arbitrary XML +payload and one for the metadata about it. + +XML specimen — data part +~~~~~~~~~~~~~~~~~~~~~~~~ + +The data part at ``/customXml/item1.xml`` carries arbitrary XML the application +chose to embed. The root element name and namespace are caller-defined:: + + + + deck-builder-cli + 2026-05-05T14:00:00Z + + +The content type is ``application/xml`` — the OPC default for the ``xml`` +extension, so no per-part Override entry is written into ``[Content_Types].xml``. + +XML specimen — itemProps part +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The sibling at ``/customXml/itemProps1.xml`` carries the ``datastoreItem`` GUID +that uniquely identifies the data part across edits, plus an optional +``schemaRefs`` list declaring the namespaces the data part claims to conform +to:: + + + + + + + + +Content type ``application/vnd.openxmlformats-officedocument.customXmlProperties+xml`` +is written as an Override entry for this partname. + +Relationship topology +~~~~~~~~~~~~~~~~~~~~~ + +The data part's relationship can be rooted in either the package or the +presentation:: + + PRESENTATION-SCOPED (default; what Office.js writes) + ──────────────────────────────────────────────────── + /ppt/_rels/presentation.xml.rels + └─ Type=customXml ─▶ /customXml/item1.xml + └─ /customXml/_rels/item1.xml.rels + └─ Type=customXmlProps ─▶ /customXml/itemProps1.xml + + + PACKAGE-SCOPED (VSTO / SharePoint topology) + ─────────────────────────────────────────── + /_rels/.rels + └─ Type=customXml ─▶ /customXml/item1.xml + └─ /customXml/_rels/item1.xml.rels + └─ Type=customXmlProps ─▶ /customXml/itemProps1.xml + +The two scopes are not interchangeable — Office.js's ``customXmlParts`` +collection only enumerates presentation-scoped parts (see this +`Microsoft Q&A response +`_). + +|pp| defaults to presentation-scoped to match Office.js. The +``scope="package"`` parameter on +:meth:`pptx.custom_xml.CustomXmlParts.add` is the override hatch for VSTO / +SharePoint compatibility. + + +Design decisions +---------------- + +The ``application/xml`` content-type ambiguity +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``PartFactory.part_type_for`` keys on content type alone, but ``application/xml`` +is the catch-all default for the ``xml`` extension — every customXml data part +shares it with potentially-unrelated XML parts in third-party packages. + +|pp| chooses to **not** register :class:`CustomXmlPart` against ``application/xml``. +Loaded data parts arrive as base ``Part`` instances; the +:class:`CustomXmlParts` collection upgrades them to :class:`CustomXmlPart` +in-place via ``__class__`` swap on first enumeration. This avoids accidentally +promoting unrelated ``application/xml`` parts in third-party packages. + +The custom-name storage convention +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +OOXML does not define a "name" attribute on customXml parts. To support +``custom_xml_parts.by_name("provenance")``, |pp| stores user-assigned names +as reserved entries in the custom document properties part keyed by the +data part's ``datastoreItem`` GUID: + +:: + + + provenance + + +This is lossless, round-trips through PowerPoint, and requires no schema +invention. The reserved entries are visible in PowerPoint's +*File → Properties → Advanced* UI by design — what the user sees in the app +matches what the Python API exposes. + +Round-trip safety with third-party tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +PowerPoint 365 (Mac and Windows) preserves both topologies across edits. +LibreOffice historically preserves package-scoped parts but is less +consistent with presentation-scoped data parts. OnlyOffice / DocumentServer +strips customXml on save in some versions +(`OnlyOffice issue #1564 `_). + +|pp| preserves any customXml part it loads, including those it did not +author — files saved by SharePoint, Office.js, or VSTO add-ins load and save +without losing their customXml content. + + +References +---------- + +* `ECMA-376 Part 1, §15.2.4 — Custom XML Data Storage Part `_ +* `ECMA-376 Part 1, §15.2.12 — Custom File Properties Part `_ +* `MS Q&A on presentation- vs. package-scoped customXml topology `_ +* `Office.js CustomXmlPart API `_ +* `python-docx-oss custom-xml docs `_ (the docx-equivalent pattern, which |pp|'s API mirrors) diff --git a/docs/dev/analysis/index.rst b/docs/dev/analysis/index.rst index 028b113b8..861f051ee 100644 --- a/docs/dev/analysis/index.rst +++ b/docs/dev/analysis/index.rst @@ -143,6 +143,7 @@ Package :maxdepth: 1 pkg-coreprops + customxml enumerations diff --git a/docs/index.rst b/docs/index.rst index 79ad6c369..b0a77b12c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -65,6 +65,7 @@ User Guide user/charts user/table user/notes + user/custom-xml user/use-cases user/concepts diff --git a/docs/user/custom-xml.rst b/docs/user/custom-xml.rst new file mode 100644 index 000000000..0a6c7e6d1 --- /dev/null +++ b/docs/user/custom-xml.rst @@ -0,0 +1,259 @@ + +Custom XML and Custom Document Properties +========================================== + +PowerPoint .pptx packages support two distinct mechanisms for embedding +application-specific structured data alongside slide content: + +* **Custom Document Properties** — name/value pairs visible in PowerPoint's UI + under *File → Properties → Advanced*. Useful for human-readable metadata + like a build number, source identifier, or workflow status flag. +* **CustomXml data parts** — arbitrary XML payloads with a caller-defined + namespace. Hidden from end users but preserved by PowerPoint across saves. + This is the mechanism Office.js, SharePoint, and VSTO add-ins use to attach + structured application data (provenance, template parameters, audit trails, + AI-generation markers, etc.). + +|pp| exposes both as live, dict-like and sequence-like surfaces on +|Presentation|. + +This is a fork-only feature: it is not currently available in upstream +``python-pptx``. See ``Plans/customxml-implementation-plan.md`` in the +repository for the full design rationale. + + +Custom Document Properties +-------------------------- + +The :attr:`Presentation.custom_properties` attribute is a Mapping wrapper +around the package's ``/docProps/custom.xml`` part. Read and write it like +a Python ``dict``: + +.. code-block:: python + + from pptx import Presentation + + prs = Presentation("input.pptx") + + prs.custom_properties["Source"] = "deck-builder-cli@1.4.2" + prs.custom_properties["BuildNumber"] = 42 + prs.custom_properties["IsDraft"] = True + + import datetime as dt + prs.custom_properties["GeneratedAt"] = dt.datetime.now(dt.timezone.utc) + + # Read back + print(prs.custom_properties["Source"]) # 'deck-builder-cli@1.4.2' + print("Source" in prs.custom_properties) # True + print(list(prs.custom_properties)) # ['Source', 'BuildNumber', ...] + + # Delete + del prs.custom_properties["IsDraft"] + + prs.save("output.pptx") + +Type dispatch on assignment is by Python type: + +============================ ======================== +Python type OOXML element +============================ ======================== +``str`` ```` +``bool`` ```` +``int`` ```` +``float`` ```` +``datetime.datetime`` ```` (UTC) +============================ ======================== + +The well-known FMTID ``{D5CDD505-2E9C-101B-9397-08002B2CF9AE}`` is used for +every entry, and the ``pid`` attribute is auto-assigned (≥ 2) per Office's +convention. You don't need to think about either. + +For cases where Python's type inference does the wrong thing — for example, +you want the string ``"42"`` rather than the integer 42 — use the explicit +typed setters: + +.. code-block:: python + + prs.custom_properties.set_string("NumAsString", "42") + prs.custom_properties.set_int("Count", 42) + prs.custom_properties.set_float("Score", 3.14) + prs.custom_properties.set_bool("Flag", True) + prs.custom_properties.set_datetime("When", dt.datetime(2026, 1, 1)) + + +CustomXml data parts +-------------------- + +The :attr:`Presentation.custom_xml_parts` attribute is a sequence-like +collection of customXml data parts attached to the package. Each entry is a +:class:`CustomXmlPart` carrying the user's arbitrary XML payload plus a +sibling :class:`CustomXmlPropertiesPart` carrying the part's ``datastoreItem`` +GUID and any declared ``schemaRef`` URIs. + +Adding a part +~~~~~~~~~~~~~ + +.. code-block:: python + + from pptx import Presentation + + prs = Presentation("input.pptx") + + prs.custom_xml_parts.add( + b''' + + deck-builder-cli + 2026-05-05T14:00:00Z + ''', + name="provenance", + schema_refs=["urn:my-app:provenance"], + ) + + prs.save("output.pptx") + +The ``xml`` argument can be ``bytes``, ``str``, or an existing lxml +``_Element``. The ``name`` is an application-assigned label stored in the +custom document properties (under a reserved ``_pptx_customxml_name_*`` key); +it is what :meth:`by_name` looks up. + +Lookup +~~~~~~ + +.. code-block:: python + + prs.custom_xml_parts[0] # by index + prs.custom_xml_parts["item3.xml"] # by partname tail + prs.custom_xml_parts.by_guid("{1A2B3C...}") # by datastoreItem GUID + prs.custom_xml_parts.by_name("provenance") # by user-assigned name + +GUID matching is case-insensitive and tolerates the ``{...}`` braces being +present or absent. + +Mutation +~~~~~~~~ + +Each :class:`CustomXmlPart` exposes the live lxml root via ``.element``; +mutating its children mutates the part: + +.. code-block:: python + + part = prs.custom_xml_parts.by_name("provenance") + source = part.element.find("{urn:my-app:provenance}source") + source.text = "deck-builder-cli@1.4.3" + + prs.save("output.pptx") + +To replace the whole payload: + +.. code-block:: python + + part.replace_xml(b'') + +For the common "flat list of items" shape, ``add_item(tag, text, **attrs)`` is +a one-liner: + +.. code-block:: python + + part.add_item("entry", "value", category="meta") + +Removal +~~~~~~~ + +.. code-block:: python + + prs.custom_xml_parts.remove(prs.custom_xml_parts.by_name("provenance")) + # or + prs.custom_xml_parts.remove(0) # by index + prs.custom_xml_parts.remove("item1.xml") # by partname tail + +``remove`` is idempotent — removing the same part twice is a silent no-op. + +The string-blob convenience helper +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For the common case of "stash this string verbatim, give it back to me on +read," |pp| provides a one-shot helper that wraps the string in a reserved +envelope element: + +.. code-block:: python + + prs.custom_xml_parts.add_string_blob( + "readme", + "# Hello\n\nThis is markdown content embedded in the .pptx.", + mime_hint="text/markdown", + ) + + # Read back later + content = prs.custom_xml_parts.read_string_blob("readme") + +For binary content, base64-encode at the call site and pass +``encoding="base64"`` so the encoding round-trips: + +.. code-block:: python + + import base64 + payload = base64.b64encode(some_bytes).decode("ascii") + prs.custom_xml_parts.add_string_blob( + "binary", payload, encoding="base64", mime_hint="application/zip" + ) + +The helper does NOT auto-encode for you — encoding is the caller's +responsibility. + + +Relationship topology — presentation vs. package scope +------------------------------------------------------ + +OOXML allows a customXml part's relationship to be rooted in either of two +places: + +* **Presentation-scoped** — the rel lives in + ``ppt/_rels/presentation.xml.rels``. This is what Office.js's + ``addCustomXmlPart`` writes and what PowerPoint's UI surfaces. +* **Package-scoped** — the rel lives in ``_rels/.rels`` (the package root). + This is the topology VSTO add-ins and SharePoint workflows historically use. + +Office.js's ``customXmlParts`` API only enumerates presentation-scoped parts, +so |pp| defaults to that. To match the VSTO/SharePoint topology, pass +``scope="package"``: + +.. code-block:: python + + prs.custom_xml_parts.add(b"", name="vsto", scope="package") + +The two scopes are not freely interchangeable — once a part is written at one +scope, |pp| preserves that scope on subsequent saves. You can move a part +between scopes by removing and re-adding it. + + +Round-trip safety +----------------- + +Modern PowerPoint preserves customXml parts across saves, including parts +your code did not author. Some other applications behave differently: + +* **PowerPoint 365 (Mac and Windows)**: preserves both presentation-scoped + and package-scoped customXml across edit/save. +* **LibreOffice**: historically preserves package-scoped customXml; behavior + with presentation-scoped parts is less consistent. +* **OnlyOffice / DocumentServer**: some versions strip customXml on save — + see `OnlyOffice/DocumentServer issue #1564 + `_. + +If your workflow must survive a round-trip through one of these tools, test +with the actual tool before relying on it. + +|pp| itself preserves any customXml parts it loads, including those it did +not author — files saved by SharePoint, Office.js, or VSTO load and save +without losing their customXml content. + + +Choosing between custom properties and customXml parts +------------------------------------------------------ + +* **Use custom document properties** for small, named, human-readable values + the user might inspect in PowerPoint's UI. +* **Use customXml parts** for structured data, larger payloads, schema-bound + XML, or anything you don't want surfaced to end users. + +The two mechanisms can coexist — a single .pptx can use both. diff --git a/pyproject.toml b/pyproject.toml index 400cb6bfd..2da7a2681 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,8 +3,9 @@ requires = ["setuptools>=61.0.0"] build-backend = "setuptools.build_meta" [project] -name = "python-pptx" +name = "python-pptx-extended" authors = [{name = "Steve Canny", email = "stcanny@gmail.com"}] +maintainers = [{name = "Matthew Horoszowski", email = "matthew.horoszowski@gmail.com"}] classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", @@ -27,7 +28,7 @@ dependencies = [ "lxml>=3.1.0", "typing_extensions>=4.9.0", ] -description = "Create, read, and update PowerPoint 2007+ (.pptx) files." +description = "Fork of python-pptx with additional formatting features (shadows, bullets, table borders, line caps/joins, line end types) and OOXML customXml support (custom doc properties, customXml data parts)." dynamic = ["version"] keywords = ["powerpoint", "ppt", "pptx", "openxml", "office"] license = { text = "MIT" } @@ -35,10 +36,11 @@ readme = "README.rst" requires-python = ">=3.8" [project.urls] -Changelog = "https://github.com/scanny/python-pptx/blob/master/HISTORY.rst" +Changelog = "https://github.com/MHoroszowski/python-pptx/blob/master/HISTORY.rst" Documentation = "https://python-pptx.readthedocs.io/en/latest/" -Homepage = "https://github.com/scanny/python-pptx" -Repository = "https://github.com/scanny/python-pptx" +Homepage = "https://github.com/MHoroszowski/python-pptx" +Repository = "https://github.com/MHoroszowski/python-pptx" +Upstream = "https://github.com/scanny/python-pptx" [tool.black] line-length = 100 @@ -70,6 +72,11 @@ filterwarnings = [ "ignore::DeprecationWarning:xdist", # -- pytest complains when pytest-xdist is not installed -- "ignore:Unknown config option. looponfailroots:pytest.PytestConfigWarning", + # -- pyparsing 2.x triggers sre_constants deprecation on Python 3.11+ -- + "ignore:module 'sre_constants' is deprecated:DeprecationWarning", + # -- pyparsing 3.x deprecates lowerCamelCase shims (delimitedList etc.) used by tests/unitutil/cxml.py -- + "ignore::DeprecationWarning:pyparsing", + "ignore::DeprecationWarning:tests.unitutil.cxml", ] looponfailroots = [ @@ -122,7 +129,7 @@ select = [ "PLR0402", # -- Name compared with itself like `foo == foo` -- "PT", # -- flake8-pytest-style -- "SIM", # -- flake8-simplify -- - "TCH001", # -- detect typing-only imports not under `if TYPE_CHECKING` -- + "TC001", # -- detect typing-only imports not under `if TYPE_CHECKING` (formerly TCH001) -- "UP015", # -- redundant `open()` mode parameter (like "r" is default) -- "UP018", # -- Unnecessary {literal_type} call like `str("abc")`. (rewrite as a literal) -- "UP032", # -- Use f-string instead of `.format()` call -- @@ -132,7 +139,6 @@ select = [ ignore = [ "COM812", # -- over aggressively insists on trailing commas where not desireable -- "PT001", # -- wants empty parens on @pytest.fixture where not used (essentially always) -- - "PT005", # -- flags mock fixtures with names intentionally matching private method name -- "PT011", # -- pytest.raises({exc}) too broad, use match param or more specific exception -- "PT012", # -- pytest.raises() block should contain a single simple statement -- "SIM117", # -- merge `with` statements for context managers that have same scope -- diff --git a/requirements-test.txt b/requirements-test.txt index 9ddd60fd7..9d9b952ad 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,6 @@ -r requirements.txt behave>=1.2.3 -pyparsing>=2.0.1 +pyparsing>=2.0.1,<3 pytest>=2.5 pytest-coverage pytest-xdist diff --git a/requirements.txt b/requirements.txt index edbb0e25c..c613dd0a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ flake8>=2.0 lxml>=3.1.0 mock>=1.0.1 Pillow>=3.3.2 -pyparsing>=2.0.1 +pyparsing>=2.0.1,<3 pytest>=2.5 XlsxWriter>=0.5.7 diff --git a/src/pptx/__init__.py b/src/pptx/__init__.py index fb5c2d7e4..7b8c9f52c 100644 --- a/src/pptx/__init__.py +++ b/src/pptx/__init__.py @@ -11,6 +11,8 @@ from pptx.opc.package import PartFactory from pptx.parts.chart import ChartPart from pptx.parts.coreprops import CorePropertiesPart +from pptx.parts.custom_properties import CustomPropertiesPart +from pptx.parts.custom_xml import CustomXmlPropertiesPart from pptx.parts.image import ImagePart from pptx.parts.media import MediaPart from pptx.parts.presentation import PresentationPart @@ -25,7 +27,7 @@ if TYPE_CHECKING: from pptx.opc.package import Part -__version__ = "1.0.2" +__version__ = "1.2.0" sys.modules["pptx.exceptions"] = exceptions del sys @@ -38,6 +40,12 @@ CT.PML_TEMPLATE_MAIN: PresentationPart, CT.PML_SLIDESHOW_MAIN: PresentationPart, CT.OPC_CORE_PROPERTIES: CorePropertiesPart, + CT.OFC_CUSTOM_PROPERTIES: CustomPropertiesPart, + CT.OFC_CUSTOM_XML_PROPERTIES: CustomXmlPropertiesPart, + # NOTE: CT.XML is intentionally NOT mapped to CustomXmlPart — see + # `Plans/customxml-implementation-plan.md` §3.6. The Phase-3 + # `CustomXmlParts` collection wraps loaded base `Part` instances + # at enumeration time. CT.PML_NOTES_MASTER: NotesMasterPart, CT.PML_NOTES_SLIDE: NotesSlidePart, CT.PML_SLIDE: SlidePart, @@ -71,6 +79,8 @@ del ( ChartPart, CorePropertiesPart, + CustomPropertiesPart, + CustomXmlPropertiesPart, ImagePart, MediaPart, SlidePart, diff --git a/src/pptx/chart/axis.py b/src/pptx/chart/axis.py index a9b877039..010698726 100644 --- a/src/pptx/chart/axis.py +++ b/src/pptx/chart/axis.py @@ -51,9 +51,7 @@ def has_major_gridlines(self): causes major gridlines to be displayed. Assigning |False| causes them to be removed. """ - if self._element.majorGridlines is None: - return False - return True + return self._element.majorGridlines is not None @has_major_gridlines.setter def has_major_gridlines(self, value): @@ -70,9 +68,7 @@ def has_minor_gridlines(self): causes minor gridlines to be displayed. Assigning |False| causes them to be removed. """ - if self._element.minorGridlines is None: - return False - return True + return self._element.minorGridlines is not None @has_minor_gridlines.setter def has_minor_gridlines(self, value): @@ -89,9 +85,7 @@ def has_title(self): causes an axis title to be added if not already present. Assigning |False| causes any existing title to be deleted. """ - if self._element.title is None: - return False - return True + return self._element.title is not None @has_title.setter def has_title(self, value): @@ -232,7 +226,7 @@ def visible(self): delete = self._element.delete_ if delete is None: return False - return False if delete.val else True + return not delete.val @visible.setter def visible(self, value): @@ -267,9 +261,7 @@ def has_text_frame(self): already present. Assigning |False| causes any existing text frame to be removed along with any text contained in the text frame. """ - if self._title.tx_rich is None: - return False - return True + return self._title.tx_rich is not None @has_text_frame.setter def has_text_frame(self, value): @@ -441,9 +433,8 @@ def crosses(self): @crosses.setter def crosses(self, value): cross_xAx = self._cross_xAx - if value == XL_AXIS_CROSSES.CUSTOM: - if cross_xAx.crossesAt is not None: - return + if value == XL_AXIS_CROSSES.CUSTOM and cross_xAx.crossesAt is not None: + return cross_xAx._remove_crosses() cross_xAx._remove_crossesAt() if value == XL_AXIS_CROSSES.CUSTOM: diff --git a/src/pptx/chart/chart.py b/src/pptx/chart/chart.py index d73aa9338..629056636 100644 --- a/src/pptx/chart/chart.py +++ b/src/pptx/chart/chart.py @@ -115,9 +115,7 @@ def has_title(self): settings. """ title = self._chartSpace.chart.title - if title is None: - return False - return True + return title is not None @has_title.setter def has_title(self, value): @@ -229,9 +227,7 @@ def has_text_frame(self): already present. Assigning |False| causes any existing text frame to be removed along with its text and formatting. """ - if self._title.tx_rich is None: - return False - return True + return self._title.tx_rich is not None @has_text_frame.setter def has_text_frame(self, value): diff --git a/src/pptx/chart/data.py b/src/pptx/chart/data.py index ec6a61f31..c25ff9349 100644 --- a/src/pptx/chart/data.py +++ b/src/pptx/chart/data.py @@ -391,9 +391,7 @@ def are_dates(self): return False first_cat_label = self[0].label date_types = (datetime.date, datetime.datetime) - if isinstance(first_cat_label, date_types): - return True - return False + return bool(isinstance(first_cat_label, date_types)) @property def are_numeric(self): @@ -414,9 +412,7 @@ def are_numeric(self): # the caller's input. first_cat_label = self[0].label numeric_types = (Number, datetime.date, datetime.datetime) - if isinstance(first_cat_label, numeric_types): - return True - return False + return bool(isinstance(first_cat_label, numeric_types)) @property def depth(self): diff --git a/src/pptx/chart/datalabel.py b/src/pptx/chart/datalabel.py index af7cdf5c0..03c7f14e2 100644 --- a/src/pptx/chart/datalabel.py +++ b/src/pptx/chart/datalabel.py @@ -177,9 +177,7 @@ def has_text_frame(self): dLbl = self._dLbl if dLbl is None: return False - if dLbl.xpath("c:tx/c:rich"): - return True - return False + return bool(dLbl.xpath("c:tx/c:rich")) @has_text_frame.setter def has_text_frame(self, value): diff --git a/src/pptx/chart/plot.py b/src/pptx/chart/plot.py index 6e7235855..795af0b21 100644 --- a/src/pptx/chart/plot.py +++ b/src/pptx/chart/plot.py @@ -340,9 +340,7 @@ def _differentiate_line_chart_type(cls, plot): def has_line_markers(): matches = lineChart.xpath('c:ser/c:marker/c:symbol[@val="none"]') - if matches: - return False - return True + return not matches if has_line_markers(): return { @@ -370,9 +368,7 @@ def _differentiate_radar_chart_type(cls, plot): def noMarkers(): matches = radarChart.xpath("c:ser/c:marker/c:symbol") - if matches and matches[0].get("val") == "none": - return True - return False + return bool(matches and matches[0].get("val") == "none") if radar_style is None: return XL.RADAR @@ -391,9 +387,7 @@ def noLine(): def noMarkers(): symbols = scatterChart.xpath("c:ser/c:marker/c:symbol") - if symbols and symbols[0].get("val") == "none": - return True - return False + return bool(symbols and symbols[0].get("val") == "none") scatter_style = scatterChart.xpath("c:scatterStyle")[0].get("val") diff --git a/src/pptx/chart/xmlwriter.py b/src/pptx/chart/xmlwriter.py index 703c53dd5..22616f2ab 100644 --- a/src/pptx/chart/xmlwriter.py +++ b/src/pptx/chart/xmlwriter.py @@ -140,7 +140,7 @@ def pt_xml(self, values): in the overall data point sequence of the chart and is started at *offset*. """ - xml = (' \n').format(pt_count=len(values)) + xml = (f' \n') pt_tmpl = ( ' \n' diff --git a/src/pptx/custom_properties.py b/src/pptx/custom_properties.py new file mode 100644 index 000000000..5ca6fa00e --- /dev/null +++ b/src/pptx/custom_properties.py @@ -0,0 +1,135 @@ +"""User-facing wrapper around the Custom Document Properties part. + +Mapping-protocol surface that lets callers read and write the values exposed +under `File → Properties → Advanced` in PowerPoint as if they were a `dict`. +""" + +from __future__ import annotations + +import datetime as dt +from typing import TYPE_CHECKING, Iterator, Mapping, Union + +if TYPE_CHECKING: + from pptx.parts.custom_properties import CustomPropertiesPart + + +CustomPropertyValue = Union[str, int, float, bool, dt.datetime] + + +class CustomProperties(Mapping[str, CustomPropertyValue]): + """Dict-like read/write access to custom document properties. + + Returned by :attr:`pptx.presentation.Presentation.custom_properties`. The + mapping is *live* — writes go directly to the underlying + `CustomPropertiesPart`; the next `Presentation.save(...)` persists them. + + Type dispatch on assignment is by Python type: + + ==================== =================== + Python type OOXML element + ==================== =================== + ``str`` ```` + ``bool`` ```` + ``int`` ```` + ``float`` ```` + ``datetime.datetime`` ```` + ==================== =================== + + For the cases where Python's type inference does the wrong thing — for + example, you want a string `"42"` rather than the integer 42 — use the + explicit :meth:`set_string` / :meth:`set_int` / etc. setters. + """ + + def __init__(self, part: "CustomPropertiesPart"): + self._part = part + + # -- Mapping protocol -------------------------------------------------- + + def __getitem__(self, name: str) -> CustomPropertyValue: + prop = self._part.get_property(name) + if prop is None: + raise KeyError(name) + value = prop.value + if value is None: + # Defensive: a malformed entry with no child is treated as + # absent rather than surfacing None — keeps the Mapping contract clean. + raise KeyError(name) + return value + + def __setitem__(self, name: str, value: CustomPropertyValue) -> None: + if not _is_supported(value): + raise TypeError( + "custom property value must be bool, int, float, str, or datetime; " + "got %s" % type(value).__name__ + ) + existing = self._part.get_property(name) + if existing is not None: + existing.value = value + return + self._part.add_property(name, value) + + def __delitem__(self, name: str) -> None: + if not self._part.remove_property(name): + raise KeyError(name) + + def __contains__(self, name: object) -> bool: + return isinstance(name, str) and self._part.get_property(name) is not None + + def __iter__(self) -> Iterator[str]: + return iter(self._part.property_names) + + def __len__(self) -> int: + return len(self._part) + + # -- Explicit-typed setters -------------------------------------------- + + def set_string(self, name: str, value: str) -> None: + """Write `value` as `` regardless of Python type.""" + if not isinstance(value, str): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("set_string value must be str, got %s" % type(value).__name__) + self._set_typed(name, value) + + def set_int(self, name: str, value: int) -> None: + """Write `value` as `` regardless of Python type. + + Rejects `bool` even though `bool` is-a `int` in Python — callers who + really want a 1/0 i4 can wrap with `int(value)` first. + """ + if isinstance(value, bool) or not isinstance(value, int): + raise TypeError("set_int value must be int, got %s" % type(value).__name__) + self._set_typed(name, value) + + def set_float(self, name: str, value: float) -> None: + """Write `value` as `` regardless of Python type.""" + if isinstance(value, bool) or not isinstance(value, (int, float)): + raise TypeError("set_float value must be a number, got %s" % type(value).__name__) + self._set_typed(name, float(value)) + + def set_bool(self, name: str, value: bool) -> None: + """Write `value` as ``.""" + if not isinstance(value, bool): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("set_bool value must be bool, got %s" % type(value).__name__) + self._set_typed(name, value) + + def set_datetime(self, name: str, value: dt.datetime) -> None: + """Write `value` as `` (UTC, ISO-8601).""" + if not isinstance(value, dt.datetime): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError( + "set_datetime value must be datetime, got %s" % type(value).__name__ + ) + self._set_typed(name, value) + + def _set_typed(self, name: str, value: CustomPropertyValue) -> None: + """Replace-or-add the property; the underlying `CT_Property.value` setter + already dispatches on Python type cleanly, so re-using it here is safe.""" + existing = self._part.get_property(name) + if existing is not None: + existing.value = value + return + self._part.add_property(name, value) + + +def _is_supported(value: object) -> bool: + if isinstance(value, bool): + return True + return isinstance(value, (int, float, str, dt.datetime)) diff --git a/src/pptx/custom_xml.py b/src/pptx/custom_xml.py new file mode 100644 index 000000000..80ecbf8d7 --- /dev/null +++ b/src/pptx/custom_xml.py @@ -0,0 +1,337 @@ +"""User-facing wrapper around customXml data parts. + +`CustomXmlParts` exposes the collection of ``-tagged +arbitrary-XML parts attached to a presentation. The user-facing element type +is :class:`pptx.parts.custom_xml.CustomXmlPart` itself — there is no separate +facade. Loaded base `Part` instances (which arise because `CT.XML` is not +mapped to `CustomXmlPart` in `pptx/__init__.py` per plan §3.6) are upgraded +in-place by `_upgrade_to_custom_xml_part(...)` on first enumeration. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Iterable, Iterator, Literal, Sequence, Union, cast + +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.oxml import parse_xml +from pptx.parts.custom_xml import CustomXmlPart, XmlPayload + +if TYPE_CHECKING: + from pptx.opc.package import Part + from pptx.oxml.xmlchemy import BaseOxmlElement + from pptx.parts.presentation import PresentationPart + + +# Reserved name-prefix used to store user-assigned customXml part names as +# entries in the custom document properties part. The key is +# `{prefix}{datastore_item_id}` and the value is the user-assigned name. +NAME_PROPERTY_PREFIX = "_pptx_customxml_name_" + +# Reserved namespace for the string-blob envelope written by `add_string_blob`. +# Read back through `read_string_blob` only — callers using `add(...)` directly +# should pick their own namespace, not this one. +BLOB_NAMESPACE = "urn:python-pptx:blob" + + +class CustomXmlParts(Sequence[CustomXmlPart]): + """Collection of customXml data parts attached to the presentation. + + Iterates both presentation-scoped (`ppt/_rels/presentation.xml.rels`) and + package-scoped (`/_rels/.rels`) `RT.CUSTOM_XML` relationships. Parts are + deduplicated by identity — a single part related from both sources appears + once. + + Lookup: + + prs.custom_xml_parts[0] # by index + prs.custom_xml_parts["item3.xml"] # by partname tail + prs.custom_xml_parts.by_guid("{...}") # by datastoreItem GUID + prs.custom_xml_parts.by_name("provenance") # by user-assigned name + """ + + def __init__(self, presentation_part: "PresentationPart"): + self._presentation_part = presentation_part + + # -- Sequence-like protocol -------------------------------------------- + + def __iter__(self) -> Iterator[CustomXmlPart]: + return self._iter_parts() + + def __len__(self) -> int: + return sum(1 for _ in self._iter_parts()) + + def __getitem__(self, key): # type: ignore[override] + if isinstance(key, int): + for i, part in enumerate(self._iter_parts()): + if i == key: + return part + raise IndexError("custom_xml_parts index out of range: %d" % key) + if isinstance(key, str): + for part in self._iter_parts(): + partname = str(part.partname) + if partname == key or partname.endswith("/" + key): + return part + raise KeyError("no custom_xml part with partname %r" % key) + raise TypeError( + "custom_xml_parts key must be int or str, got %s" % type(key).__name__ + ) + + # -- Public lookups ---------------------------------------------------- + + def by_guid(self, guid: str) -> CustomXmlPart | None: + """Return the part whose `datastore_item_id` matches `guid`, or None. + + Match is case-insensitive and curly-brace-tolerant — `"{ABCD-...}"` and + `"abcd-..."` both find the same part. + """ + target = _normalize_guid(guid) + for part in self._iter_parts(): + if _normalize_guid(part.datastore_item_id) == target: + return part + return None + + def by_name(self, name: str) -> CustomXmlPart | None: + """Return the part previously added with `name=...`, or None. + + Names are stored as reserved entries in the custom document properties + part keyed by datastore_item_id; this method reverse-resolves the name + through that table. + """ + if not isinstance(name, str): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("name must be str, got %s" % type(name).__name__) + cp_part = self._presentation_part.package.custom_properties_part + for prop in cp_part._element.property_lst: + if not prop.name.startswith(NAME_PROPERTY_PREFIX): + continue + if prop.value != name: + continue + guid = prop.name[len(NAME_PROPERTY_PREFIX) :] + return self.by_guid(guid) + return None + + # -- Mutation ---------------------------------------------------------- + + def add( + self, + xml: XmlPayload, + *, + name: str | None = None, + datastoreItem_id: str | None = None, + schema_refs: Iterable[str] | None = None, + scope: Literal["presentation", "package"] = "presentation", + ) -> CustomXmlPart: + """Add a new customXml part with `xml` as its payload. + + See module docstring for parameter semantics. Returns the new part, + already attached to the presentation; nothing else is required before + `prs.save(...)`. + """ + if scope not in ("presentation", "package"): + raise ValueError( + "scope must be 'presentation' or 'package', got %r" % (scope,) + ) + + package = self._presentation_part.package + data_part = CustomXmlPart.new_pair( + package, + xml, + datastore_item_id=datastoreItem_id, + schema_refs=tuple(schema_refs) if schema_refs is not None else (), + ) + + if scope == "presentation": + self._presentation_part.relate_to(data_part, RT.CUSTOM_XML) + else: + package.relate_to(data_part, RT.CUSTOM_XML) + + if name is not None: + cp = package.custom_properties + cp[NAME_PROPERTY_PREFIX + data_part.datastore_item_id] = name + + return data_part + + def add_string_blob( + self, + name: str, + content: str, + *, + mime_hint: str | None = None, + encoding: Literal["text", "base64"] = "text", + scope: Literal["presentation", "package"] = "presentation", + ) -> CustomXmlPart: + """Embed a string payload as a customXml part. + + Wraps `content` in a one-element XML envelope under the reserved + `urn:python-pptx:blob` namespace:: + + + + For binary or non-XML-safe text, set ``encoding="base64"`` and pass + already-encoded `content` — the helper does NOT encode for you. Read + back via :meth:`read_string_blob`. + + `mime_hint` is stored as the ``mime`` attribute on the envelope and + round-trips for the caller's reference; it has no effect on PowerPoint. + + Returns the created :class:`CustomXmlPart`. Already attached at the + chosen scope; nothing else is needed before ``prs.save(...)``. + """ + if not isinstance(name, str) or not name: + raise ValueError("name must be a non-empty string") + if not isinstance(content, str): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("content must be str, got %s" % type(content).__name__) + if encoding not in ("text", "base64"): + raise ValueError( + "encoding must be 'text' or 'base64', got %r" % (encoding,) + ) + + from lxml import etree + + envelope = etree.Element("{%s}blob" % BLOB_NAMESPACE, nsmap={None: BLOB_NAMESPACE}) + envelope.set("name", name) + envelope.set("encoding", encoding) + if mime_hint is not None: + envelope.set("mime", mime_hint) + envelope.text = content + + return self.add(envelope, name=name, scope=scope) + + def read_string_blob(self, name: str) -> str | None: + """Return the string payload of the blob part `name`, or `None`. + + Locates the part via :meth:`by_name`. Returns `None` if no such part + exists or if the part is not a `urn:python-pptx:blob` envelope (i.e. + was added by some other API or tool). + + For ``encoding="base64"`` blobs, the still-encoded string is returned + — the caller decodes. The original encoding is recoverable from + :meth:`blob_encoding`. + """ + part = self.by_name(name) + if part is None: + return None + root = part.element + if root.tag != "{%s}blob" % BLOB_NAMESPACE: + return None + return root.text or "" + + def blob_encoding(self, name: str) -> str | None: + """Return the `encoding` attribute of the blob part `name`, or `None`. + + Useful when a caller mixes text and base64 blobs and needs to decode + the latter on read. + """ + part = self.by_name(name) + if part is None: + return None + root = part.element + if root.tag != "{%s}blob" % BLOB_NAMESPACE: + return None + return root.get("encoding") + + def remove(self, part: Union[CustomXmlPart, int, str]) -> None: + """Remove a customXml part from the presentation. + + Drops the relationship from whichever scope (presentation or package) + currently holds it, plus any reserved name entry in custom_properties. + Idempotent — a second call on an already-removed part is a no-op. + + The data → props rel is intentionally LEFT IN PLACE on the now-orphaned + data part. Once the source rel is gone, neither the data part nor the + props part is reachable from `iter_parts`, so both are omitted on + save. Keeping the rel around lets a caller still read + `part.datastore_item_id` on the returned reference after removal, + which matches the principle of least surprise for held references. + """ + target = self._resolve(part) + if target is None: + return + + # Drop the reserved name entry, if any. Reading datastore_item_id + # here requires the data → props rel to still be intact. + cp_part = self._presentation_part.package.custom_properties_part + cp_part.remove_property(NAME_PROPERTY_PREFIX + target.datastore_item_id) + + # Drop the rel from whichever source holds it (presentation or package). + for rels in self._iter_rel_collections(): + for rId, rel in list(rels.items()): + if rel.is_external or rel.reltype != RT.CUSTOM_XML: + continue + if rel.target_part is target: + rels.pop(rId) + + # -- Internals --------------------------------------------------------- + + def _iter_parts(self) -> Iterator[CustomXmlPart]: + """Yield each unique customXml data part across both rel sources.""" + seen: set[int] = set() + for rels in self._iter_rel_collections(): + for rel in rels.values(): + if rel.is_external or rel.reltype != RT.CUSTOM_XML: + continue + part = _upgrade_to_custom_xml_part(rel.target_part) + if id(part) in seen: + continue + seen.add(id(part)) + yield part + + def _iter_rel_collections(self): + """Yield the two relationship collections to scan for `RT.CUSTOM_XML`. + + Presentation part exposes `.rels` publicly; the package exposes the + same collection internally as `_rels` (it has no public API for + external rel inspection because most callers reach the rel graph via + `iter_parts`/`iter_rels` instead). We need direct rel access here to + find the source rel for `add(scope="package")`-attached parts. + """ + yield self._presentation_part.rels + yield self._presentation_part.package._rels + + def _resolve( + self, part: Union[CustomXmlPart, int, str] + ) -> CustomXmlPart | None: + if isinstance(part, CustomXmlPart): + return part + if isinstance(part, int): + try: + return self.__getitem__(part) + except IndexError: + return None + if isinstance(part, str): + try: + return self.__getitem__(part) + except KeyError: + return None + raise TypeError( + "remove() argument must be CustomXmlPart, int, or str; got %s" + % type(part).__name__ + ) + + +def _upgrade_to_custom_xml_part(part: Part) -> CustomXmlPart: + """Upgrade a base `Part` to `CustomXmlPart` in-place via `__class__` swap. + + Loaded `application/xml` parts come in as plain `Part` because plan §3.6 + intentionally leaves `CT.XML` unmapped. On first enumeration, we promote + the instance: assign the `CustomXmlPart` class, parse its blob to lxml, + and stash the parsed root in `_element`. The package's rel graph keeps + pointing at the same instance, so every other reference now resolves to + the upgraded class with no graph rewriting. + """ + if isinstance(part, CustomXmlPart): + return part + element = cast("BaseOxmlElement", parse_xml(part.blob)) + part.__class__ = CustomXmlPart + part._element = element # type: ignore[attr-defined] + return cast(CustomXmlPart, part) + + +def _normalize_guid(guid: str) -> str: + """Lowercase and strip surrounding curly braces for comparison.""" + if not isinstance(guid, str): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("guid must be str, got %s" % type(guid).__name__) + s = guid.strip().lower() + if s.startswith("{") and s.endswith("}"): + s = s[1:-1] + return s diff --git a/src/pptx/dml/effect.py b/src/pptx/dml/effect.py index 7482de859..980abd3fe 100644 --- a/src/pptx/dml/effect.py +++ b/src/pptx/dml/effect.py @@ -100,9 +100,7 @@ def inherit(self): Assigning |False| causes the inheritance link to be broken and **no** effects to appear on the shape. """ - if self._element.effectLst is None: - return True - return False + return self._element.effectLst is None @inherit.setter def inherit(self, value): diff --git a/src/pptx/opc/serialized.py b/src/pptx/opc/serialized.py index 92366708b..1c99f2fee 100644 --- a/src/pptx/opc/serialized.py +++ b/src/pptx/opc/serialized.py @@ -44,7 +44,9 @@ def rels_xml_for(self, partname: PackURI) -> bytes | None: instance. """ blob_reader, uri = self._blob_reader, partname.rels_uri - return blob_reader[uri] if uri in blob_reader else None + # `_blob_reader` is a Container, not a Mapping — it has no `.get()`, + # so SIM401's "use blob_reader.get(uri, None)" rewrite would break. + return blob_reader[uri] if uri in blob_reader else None # noqa: SIM401 @lazyproperty def _blob_reader(self) -> _PhysPkgReader: diff --git a/src/pptx/oxml/__init__.py b/src/pptx/oxml/__init__.py index 4fe208a50..fc216a493 100644 --- a/src/pptx/oxml/__init__.py +++ b/src/pptx/oxml/__init__.py @@ -217,6 +217,36 @@ def register_element_cls(nsptagname: str, cls: Type[BaseOxmlElement]): register_element_cls("cp:coreProperties", CT_CoreProperties) +from pptx.oxml.custom_properties import ( # noqa: E402 + CT_Properties, + CT_Property, + CT_VtBool, + CT_VtFiletime, + CT_VtI4, + CT_VtLpwstr, + CT_VtR8, +) + +register_element_cls("op:Properties", CT_Properties) +register_element_cls("op:property", CT_Property) +register_element_cls("vt:bool", CT_VtBool) +register_element_cls("vt:filetime", CT_VtFiletime) +register_element_cls("vt:i4", CT_VtI4) +register_element_cls("vt:lpwstr", CT_VtLpwstr) +register_element_cls("vt:r8", CT_VtR8) + + +from pptx.oxml.custom_xml import ( # noqa: E402 + CT_DatastoreItem, + CT_DatastoreSchemaRef, + CT_DatastoreSchemaRefs, +) + +register_element_cls("ds:datastoreItem", CT_DatastoreItem) +register_element_cls("ds:schemaRef", CT_DatastoreSchemaRef) +register_element_cls("ds:schemaRefs", CT_DatastoreSchemaRefs) + + from pptx.oxml.dml.color import ( # noqa: E402 CT_Color, CT_HslColor, diff --git a/src/pptx/oxml/chart/chart.py b/src/pptx/oxml/chart/chart.py index f4cd0dc7c..d2b7150bd 100644 --- a/src/pptx/oxml/chart/chart.py +++ b/src/pptx/oxml/chart/chart.py @@ -48,9 +48,7 @@ def has_legend(self): True if this chart has a legend defined, False otherwise. """ legend = self.legend - if legend is None: - return False - return True + return legend is not None @has_legend.setter def has_legend(self, bool_value): diff --git a/src/pptx/oxml/chart/plot.py b/src/pptx/oxml/chart/plot.py index 9c695a43a..855ed5903 100644 --- a/src/pptx/oxml/chart/plot.py +++ b/src/pptx/oxml/chart/plot.py @@ -62,9 +62,9 @@ def cat_pts(self): if not cat_pts: cat_pts = self.xpath("./c:ser[1]/c:cat//c:pt") - cat_pt_dict = dict((pt.idx, pt) for pt in cat_pts) + cat_pt_dict = {pt.idx: pt for pt in cat_pts} - return [cat_pt_dict.get(idx, None) for idx in range(self.cat_pt_count)] + return [cat_pt_dict.get(idx) for idx in range(self.cat_pt_count)] @property def grouping_val(self): diff --git a/src/pptx/oxml/custom_properties.py b/src/pptx/oxml/custom_properties.py new file mode 100644 index 000000000..ba60a6cba --- /dev/null +++ b/src/pptx/oxml/custom_properties.py @@ -0,0 +1,319 @@ +"""lxml custom element classes for the Custom Document Properties part. + +Models `/docProps/custom.xml` — the `` root and its `` +children, each carrying one of five typed `` value elements. + +Schema references: ECMA-376 Part 1, §15.2.12.2 (Custom File Properties Part). +""" + +from __future__ import annotations + +import datetime as dt +from typing import cast + +from pptx.oxml import parse_xml +from pptx.oxml.ns import nsdecls, qn +from pptx.oxml.simpletypes import XsdString, XsdUnsignedInt +from pptx.oxml.xmlchemy import ( + BaseOxmlElement, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, +) + +# Well-known FMTID Office writes on every user-defined custom property. +DEFAULT_FMTID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" + +# pid values 0 and 1 are reserved by the OOXML spec; user properties start at 2. +_FIRST_PID = 2 + +# Maximum string length for an lpwstr value. Office-tested limit; longer values +# round-trip but are reported by some inspectors as malformed. +_LPWSTR_MAX_LEN = 255 + + +class CT_Properties(BaseOxmlElement): + """`` element, root of `/docProps/custom.xml`. + + The xmlchemy declaration is named `prop` rather than `property` because the + latter would shadow Python's built-in `@property` decorator inside the + class body — see metaclass-walk in `xmlchemy.py:120-131`. Public methods + below preserve the `*_property` naming on the user-facing surface. + """ + + prop = ZeroOrMore("op:property", successors=()) + + _properties_tmpl = "\n" % nsdecls("op", "vt") + + @staticmethod + def new_properties() -> "CT_Properties": + """Return a new empty `` element with op + vt namespaces.""" + return cast("CT_Properties", parse_xml(CT_Properties._properties_tmpl)) + + @property + def property_lst(self) -> "list[CT_Property]": + """List of `` children in document order.""" + return cast("list[CT_Property]", self.prop_lst) + + def add_property(self, name: str, value: object) -> "CT_Property": + """Append a new `` child for `(name, value)`. + + The pid is auto-assigned to the next free integer ≥ 2 within this + collection. Dispatches `value` by Python type to choose the `` + child. Raises `TypeError` if `value` is not one of the supported types + (see `CT_Property.value` for the dispatch table). + """ + prop = cast("CT_Property", self._add_prop()) + prop.fmtid = DEFAULT_FMTID + prop.pid = self._next_pid() + prop.name = name + prop.value = value + return prop + + def get_property(self, name: str) -> "CT_Property | None": + """Return the `` child whose `name` attribute is `name`. + + Returns `None` if no such child exists. Match is case-sensitive — Office + treats names case-sensitively even though Windows file names elsewhere + do not. + """ + for prop in self.property_lst: + if prop.name == name: + return prop + return None + + def remove_property(self, name: str) -> bool: + """Remove the `` child with `name`, returning True if found.""" + prop = self.get_property(name) + if prop is None: + return False + self.remove(prop) + return True + + @property + def property_names(self) -> tuple[str, ...]: + """Tuple of `name` attributes for every `` child, in order.""" + return tuple(p.name for p in self.property_lst) + + def _next_pid(self) -> int: + """Return the next free pid (≥ 2) not yet used by any child.""" + used = {p.pid for p in self.property_lst if p.has_pid} + candidate = _FIRST_PID + while candidate in used: + candidate += 1 + return candidate + + +class CT_Property(BaseOxmlElement): + """`` element — one custom document property entry.""" + + fmtid: str = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "fmtid", XsdString + ) + pid: int = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "pid", XsdUnsignedInt + ) + name: str = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "name", XsdString + ) + + lpwstr = ZeroOrOne("vt:lpwstr", successors=()) + i4 = ZeroOrOne("vt:i4", successors=()) + r8 = ZeroOrOne("vt:r8", successors=()) + bool_ = ZeroOrOne("vt:bool", successors=()) + filetime = ZeroOrOne("vt:filetime", successors=()) + + @property + def has_pid(self) -> bool: + """True if the `pid` attribute is present (it is required, but parsing + a malformed file can leave it unset; this guards `_next_pid` against + crashing on partial input).""" + return self.get("pid") is not None + + @property + def value(self) -> str | int | float | bool | dt.datetime | None: + """The Python-typed value of whichever `` child is present. + + Returns `None` if no value child exists (a malformed but tolerated state). + Order of precedence on read: lpwstr, i4, r8, bool, filetime — only one + is expected to be present per the spec. + """ + for child in (self.lpwstr, self.i4, self.r8, self.bool_, self.filetime): + if child is not None: + return cast("_VtValueElement", child).value_typed + return None + + @value.setter + def value(self, new_value: object) -> None: + """Replace the current `` child with one matching `new_value`'s type. + + Dispatch table (bool checked BEFORE int because `bool` is a subclass of + `int` in Python): + + bool -> + int -> + float -> + str -> + datetime.datetime -> + + Other types raise `TypeError`. + """ + # Remove any existing value child before adding the new one. + for tagname in ("vt:lpwstr", "vt:i4", "vt:r8", "vt:bool", "vt:filetime"): + for elem in self.findall(qn(tagname)): + self.remove(elem) + + if isinstance(new_value, bool): + child = cast("CT_VtBool", self.get_or_add_bool_()) + child.value_typed = new_value + elif isinstance(new_value, int): + child = cast("CT_VtI4", self.get_or_add_i4()) + child.value_typed = new_value + elif isinstance(new_value, float): + child = cast("CT_VtR8", self.get_or_add_r8()) + child.value_typed = new_value + elif isinstance(new_value, str): + child = cast("CT_VtLpwstr", self.get_or_add_lpwstr()) + child.value_typed = new_value + elif isinstance(new_value, dt.datetime): + child = cast("CT_VtFiletime", self.get_or_add_filetime()) + child.value_typed = new_value + else: + raise TypeError( + "custom property value must be bool, int, float, str, or datetime; " + "got %s" % type(new_value).__name__ + ) + + +class _VtValueElement(BaseOxmlElement): + """Mixin-style base for `` typed value elements. + + Subclasses define a `value_typed` property that round-trips the element's + text content to/from a Python value. + """ + + value_typed: object # pyright: ignore[reportUninitializedInstanceVariable] + + +class CT_VtLpwstr(_VtValueElement): + """`` — Unicode string value.""" + + @property + def value_typed(self) -> str: + return self.text or "" + + @value_typed.setter + def value_typed(self, value: str) -> None: + if not isinstance(value, str): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("vt:lpwstr value must be str, got %s" % type(value).__name__) + if len(value) > _LPWSTR_MAX_LEN: + raise ValueError( + "vt:lpwstr value exceeds %d-character limit" % _LPWSTR_MAX_LEN + ) + self.text = value + + +class CT_VtI4(_VtValueElement): + """`` — 32-bit signed integer value.""" + + _MIN = -2147483648 + _MAX = 2147483647 + + @property + def value_typed(self) -> int: + text = self.text + if text is None: + raise ValueError("vt:i4 element has no text content") + return int(text) + + @value_typed.setter + def value_typed(self, value: int) -> None: + if isinstance(value, bool) or not isinstance(value, int): + raise TypeError("vt:i4 value must be int, got %s" % type(value).__name__) + if value < self._MIN or value > self._MAX: + raise ValueError( + "vt:i4 value out of range [%d, %d]: %d" % (self._MIN, self._MAX, value) + ) + self.text = str(value) + + +class CT_VtR8(_VtValueElement): + """`` — IEEE-754 double-precision float value.""" + + @property + def value_typed(self) -> float: + text = self.text + if text is None: + raise ValueError("vt:r8 element has no text content") + return float(text) + + @value_typed.setter + def value_typed(self, value: float) -> None: + if isinstance(value, bool): + raise TypeError("vt:r8 value must be float, got bool") + if not isinstance(value, (int, float)): + raise TypeError("vt:r8 value must be a number, got %s" % type(value).__name__) + self.text = repr(float(value)) + + +class CT_VtBool(_VtValueElement): + """`` — boolean value. + + Reads accept `"1"`, `"0"`, `"true"`, `"false"` (case-insensitive). Writes + emit `"true"` or `"false"` to match what Microsoft Office produces. + """ + + @property + def value_typed(self) -> bool: + text = (self.text or "").strip().lower() + if text in ("true", "1"): + return True + if text in ("false", "0"): + return False + raise ValueError("vt:bool element has invalid text content: %r" % self.text) + + @value_typed.setter + def value_typed(self, value: bool) -> None: + if not isinstance(value, bool): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError("vt:bool value must be bool, got %s" % type(value).__name__) + self.text = "true" if value else "false" + + +class CT_VtFiletime(_VtValueElement): + """`` — ISO-8601 UTC datetime value (always with `Z` suffix).""" + + @property + def value_typed(self) -> dt.datetime: + text = self.text + if text is None: + raise ValueError("vt:filetime element has no text content") + return _parse_iso_utc(text) + + @value_typed.setter + def value_typed(self, value: dt.datetime) -> None: + if not isinstance(value, dt.datetime): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError( + "vt:filetime value must be datetime, got %s" % type(value).__name__ + ) + # Office writes filetimes as UTC with a literal trailing 'Z'. If the + # caller supplied a tz-aware value in another zone, convert; if naive, + # assume already UTC (matches CorePropertiesPart's behavior). + if value.tzinfo is not None: + value = value.astimezone(dt.timezone.utc).replace(tzinfo=None) + self.text = value.strftime("%Y-%m-%dT%H:%M:%SZ") + + +def _parse_iso_utc(text: str) -> dt.datetime: + """Parse `text` as ISO-8601, returning a naive UTC `datetime`. + + Accepts the `Z` suffix Office writes and the `+HH:MM` form some tools use. + Returns a naive datetime in UTC for symmetry with `_set_element_datetime` + in `coreprops`. Raises `ValueError` on unparsable input. + """ + cleaned = text.strip() + if cleaned.endswith("Z"): + cleaned = cleaned[:-1] + "+00:00" + parsed = dt.datetime.fromisoformat(cleaned) + if parsed.tzinfo is not None: + parsed = parsed.astimezone(dt.timezone.utc).replace(tzinfo=None) + return parsed diff --git a/src/pptx/oxml/custom_xml.py b/src/pptx/oxml/custom_xml.py new file mode 100644 index 000000000..f568952bb --- /dev/null +++ b/src/pptx/oxml/custom_xml.py @@ -0,0 +1,119 @@ +"""lxml custom element classes for customXml itemProps parts. + +Models the `` root of `/customXml/itemPropsN.xml` — the +sibling part of each `/customXml/itemN.xml` data part. Carries the +`datastoreItem` GUID identifying the data part across edits and the optional +`` list declaring the XML namespaces the data part claims to +conform to. + +Schema references: ECMA-376 Part 1, §15.2.4 (Custom XML Data Storage Part). +""" + +from __future__ import annotations + +from typing import Iterable, cast + +from pptx.oxml import parse_xml +from pptx.oxml.ns import nsdecls +from pptx.oxml.simpletypes import XsdString +from pptx.oxml.xmlchemy import ( + BaseOxmlElement, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, +) + + +class CT_DatastoreItem(BaseOxmlElement): + """`` element — root of `/customXml/itemPropsN.xml`.""" + + itemID: str = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "ds:itemID", XsdString + ) + schemaRefs = ZeroOrOne("ds:schemaRefs", successors=()) + + _datastoreItem_tmpl = ( + '\n' % nsdecls("ds") + ) + + @staticmethod + def new(itemID: str, schema_refs: Iterable[str] = ()) -> "CT_DatastoreItem": + """Return a new `` with `itemID` and optional schema_refs. + + `itemID` should be a curly-braced GUID string, e.g. + `"{1A2B3C4D-5E6F-7890-ABCD-EF1234567890}"`. The caller is responsible + for generating it (typically via `uuid.uuid4()`); this layer does not + validate the GUID format because Office tolerates non-canonical forms. + """ + elm = cast( + "CT_DatastoreItem", + parse_xml(CT_DatastoreItem._datastoreItem_tmpl % itemID), + ) + for uri in schema_refs: + elm.add_schema_ref(uri) + return elm + + def add_schema_ref(self, uri: str) -> "CT_DatastoreSchemaRef": + """Add a `` child. + + Creates the parent `` element if it is not already + present. If a schemaRef with `uri` already exists, returns the existing + one rather than adding a duplicate. + """ + refs = cast("CT_DatastoreSchemaRefs", self.get_or_add_schemaRefs()) + existing = refs.find_by_uri(uri) + if existing is not None: + return existing + ref = cast("CT_DatastoreSchemaRef", refs._add_schemaRef()) + ref.uri = uri + return ref + + def remove_schema_ref(self, uri: str) -> bool: + """Remove the schemaRef with `uri`, returning True if found. + + If removing the last schemaRef leaves `` empty, the + empty parent element is also removed (Office writes the file this way + — no empty `` envelope). + """ + refs = cast("CT_DatastoreSchemaRefs | None", self.schemaRefs) + if refs is None: + return False + ref = refs.find_by_uri(uri) + if ref is None: + return False + refs.remove(ref) + if len(refs.schemaRef_lst) == 0: + self.remove(refs) + return True + + @property + def schema_ref_uris(self) -> tuple[str, ...]: + """Tuple of `ds:uri` values for every ``, in document order.""" + refs = cast("CT_DatastoreSchemaRefs | None", self.schemaRefs) + if refs is None: + return () + return tuple( + cast("CT_DatastoreSchemaRef", r).uri + for r in cast("list[BaseOxmlElement]", refs.schemaRef_lst) + ) + + +class CT_DatastoreSchemaRefs(BaseOxmlElement): + """`` — collection of `` children.""" + + schemaRef = ZeroOrMore("ds:schemaRef", successors=()) + + def find_by_uri(self, uri: str) -> "CT_DatastoreSchemaRef | None": + """Return the `` child whose `ds:uri` is `uri`, or None.""" + for ref in cast("list[CT_DatastoreSchemaRef]", self.schemaRef_lst): + if ref.uri == uri: + return ref + return None + + +class CT_DatastoreSchemaRef(BaseOxmlElement): + """`` — a single XML namespace this customXml part conforms to.""" + + uri: str = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "ds:uri", XsdString + ) diff --git a/src/pptx/oxml/ns.py b/src/pptx/oxml/ns.py index d900c33bf..0cf0ea893 100644 --- a/src/pptx/oxml/ns.py +++ b/src/pptx/oxml/ns.py @@ -2,7 +2,6 @@ from __future__ import annotations - # -- Maps namespace prefix to namespace name for all known PowerPoint XML namespaces -- _nsmap = { "a": "http://schemas.openxmlformats.org/drawingml/2006/main", @@ -12,12 +11,14 @@ "dc": "http://purl.org/dc/elements/1.1/", "dcmitype": "http://purl.org/dc/dcmitype/", "dcterms": "http://purl.org/dc/terms/", + "ds": "http://schemas.openxmlformats.org/officeDocument/2006/customXml", "ep": "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties", "i": "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image", "m": "http://schemas.openxmlformats.org/officeDocument/2006/math", "mo": "http://schemas.microsoft.com/office/mac/office/2008/main", "mv": "urn:schemas-microsoft-com:mac:vml", "o": "urn:schemas-microsoft-com:office:office", + "op": "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties", "p": "http://schemas.openxmlformats.org/presentationml/2006/main", "pd": "http://schemas.openxmlformats.org/drawingml/2006/presentationDrawing", "pic": "http://schemas.openxmlformats.org/drawingml/2006/picture", @@ -26,6 +27,7 @@ "sl": "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout", "v": "urn:schemas-microsoft-com:vml", "ve": "http://schemas.openxmlformats.org/markup-compatibility/2006", + "vt": "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main", "w10": "urn:schemas-microsoft-com:office:word", "wne": "http://schemas.microsoft.com/office/word/2006/wordml", diff --git a/src/pptx/oxml/shapes/groupshape.py b/src/pptx/oxml/shapes/groupshape.py index f62bc6662..4687b5511 100644 --- a/src/pptx/oxml/shapes/groupshape.py +++ b/src/pptx/oxml/shapes/groupshape.py @@ -194,7 +194,7 @@ def recalculate_extents(self) -> None: This method is recursive "upwards" since a change in a group shape can change the position and size of its containing group. """ - if not self.tag == qn("p:grpSp"): + if self.tag != qn("p:grpSp"): return x, y, cx, cy = self._child_extents diff --git a/src/pptx/oxml/table.py b/src/pptx/oxml/table.py index 09fc35284..3e44de35d 100644 --- a/src/pptx/oxml/table.py +++ b/src/pptx/oxml/table.py @@ -519,9 +519,7 @@ def dimensions(self) -> tuple[int, int]: @lazyproperty def in_same_table(self): """True if both cells provided to constructor are in same table.""" - if self._tc.tbl is self._other_tc.tbl: - return True - return False + return self._tc.tbl is self._other_tc.tbl def iter_except_left_col_tcs(self): """Generate each `a:tc` element not in leftmost column of range.""" diff --git a/src/pptx/oxml/text.py b/src/pptx/oxml/text.py index 5c72182ac..5324f7fe6 100644 --- a/src/pptx/oxml/text.py +++ b/src/pptx/oxml/text.py @@ -120,9 +120,7 @@ def is_empty(self) -> bool: if not ps: raise InvalidXmlError("p:txBody must have at least one a:p") - if ps[0].text != "": - return False - return True + return ps[0].text == "" @classmethod def new(cls): diff --git a/src/pptx/oxml/xmlchemy.py b/src/pptx/oxml/xmlchemy.py index 41fb2e171..d5eb62ccc 100644 --- a/src/pptx/oxml/xmlchemy.py +++ b/src/pptx/oxml/xmlchemy.py @@ -100,9 +100,7 @@ def _eq_elm_strs(self, line: str, line_2: str) -> bool: return False if close != close_2: return False - if text != text_2: - return False - return True + return text == text_2 def _parse_line(self, line: str): """Return front, attrs, close, text 4-tuple result of parsing XML element string `line`.""" @@ -456,10 +454,7 @@ def _prop_name(self): """ Calculate property name from tag name, e.g. a:schemeClr -> schemeClr. """ - if ":" in self._nsptagname: - start = self._nsptagname.index(":") + 1 - else: - start = 0 + start = self._nsptagname.index(":") + 1 if ":" in self._nsptagname else 0 return self._nsptagname[start:] @lazyproperty diff --git a/src/pptx/package.py b/src/pptx/package.py index 79703cd6c..cac321073 100644 --- a/src/pptx/package.py +++ b/src/pptx/package.py @@ -8,6 +8,7 @@ from pptx.opc.package import OpcPackage from pptx.opc.packuri import PackURI from pptx.parts.coreprops import CorePropertiesPart +from pptx.parts.custom_properties import CustomPropertiesPart from pptx.parts.image import Image, ImagePart from pptx.parts.media import MediaPart from pptx.util import lazyproperty @@ -29,6 +30,35 @@ def core_properties(self) -> CorePropertiesPart: self.relate_to(core_props, RT.CORE_PROPERTIES) return core_props + @lazyproperty + def custom_properties_part(self) -> CustomPropertiesPart: + """The Custom Document Properties part for this package. + + Creates an empty `/docProps/custom.xml` if no such part is present + (mirrors :attr:`core_properties` behavior). The relationship is rooted + at the package — Office writes it as a sibling of `core.xml`. + """ + try: + return self.part_related_by(RT.CUSTOM_PROPERTIES) + except KeyError: + cp_part = CustomPropertiesPart.default(self) + self.relate_to(cp_part, RT.CUSTOM_PROPERTIES) + return cp_part + + @property + def custom_properties(self): + """Mapping-protocol view over the Custom Document Properties part. + + Returns a :class:`pptx.custom_properties.CustomProperties` instance + wrapping the package's `CustomPropertiesPart`. The same wrapper + instance is reused across calls (it's a thin facade with no state). + """ + # Local import — `pptx.custom_properties` and `pptx.package` would + # otherwise form a cycle through the parts layer. + from pptx.custom_properties import CustomProperties + + return CustomProperties(self.custom_properties_part) + def get_or_add_image_part(self, image_file: str | IO[bytes]): """ Return an |ImagePart| object containing the image in *image_file*. If diff --git a/src/pptx/parts/custom_properties.py b/src/pptx/parts/custom_properties.py new file mode 100644 index 000000000..efe55319a --- /dev/null +++ b/src/pptx/parts/custom_properties.py @@ -0,0 +1,66 @@ +"""Custom Document Properties part — `/docProps/custom.xml`.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Iterator + +from pptx.opc.constants import CONTENT_TYPE as CT +from pptx.opc.package import XmlPart +from pptx.opc.packuri import PackURI +from pptx.oxml.custom_properties import CT_Properties, CT_Property + +if TYPE_CHECKING: + from pptx.package import Package + + +class CustomPropertiesPart(XmlPart): + """Corresponds to part named `/docProps/custom.xml`. + + Holds the package's custom (user-defined) document properties — the values + surfaced under `File → Properties → Advanced` in PowerPoint. The + user-facing Mapping wrapper lives at `pptx.custom_properties.CustomProperties` + (Phase 3); this part just owns the XML and the per-property delegators. + """ + + _element: CT_Properties + + @classmethod + def default(cls, package: "Package") -> "CustomPropertiesPart": + """Return a new empty `CustomPropertiesPart` ready to add to `package`. + + Useful as the seed when a presentation doesn't yet have a custom + properties part. The returned instance has no properties on it; the + caller adds entries via `add_property(...)`. + """ + return cls( + PackURI("/docProps/custom.xml"), + CT.OFC_CUSTOM_PROPERTIES, + package, + CT_Properties.new_properties(), + ) + + def add_property(self, name: str, value: object) -> CT_Property: + """Add a new `` for `(name, value)` and return it.""" + return self._element.add_property(name, value) + + def get_property(self, name: str) -> CT_Property | None: + """Return the `` with `name` or `None` if absent.""" + return self._element.get_property(name) + + def remove_property(self, name: str) -> bool: + """Remove the `` with `name`, returning True if found.""" + return self._element.remove_property(name) + + @property + def property_names(self) -> tuple[str, ...]: + """Tuple of property names in document order.""" + return self._element.property_names + + def __contains__(self, name: object) -> bool: + return isinstance(name, str) and self._element.get_property(name) is not None + + def __iter__(self) -> Iterator[str]: + return iter(self._element.property_names) + + def __len__(self) -> int: + return len(self._element.property_lst) diff --git a/src/pptx/parts/custom_xml.py b/src/pptx/parts/custom_xml.py new file mode 100644 index 000000000..1bf0e42d4 --- /dev/null +++ b/src/pptx/parts/custom_xml.py @@ -0,0 +1,273 @@ +"""customXml data parts and their itemProps siblings. + +Two part subclasses living together in this module because they are an atomic +pair — a `CustomXmlPart` is meaningless without its `CustomXmlPropertiesPart` +sibling, and vice versa. Both are created by `CustomXmlPart.new_pair(...)`. + +Schema references: ECMA-376 Part 1, §15.2.4 (Custom XML Data Storage Part). +""" + +from __future__ import annotations + +import uuid +from typing import TYPE_CHECKING, Iterable, Union, cast + +from lxml.etree import _Element # pyright: ignore[reportPrivateUsage] + +from pptx.opc.constants import CONTENT_TYPE as CT +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.opc.package import XmlPart +from pptx.opc.packuri import PackURI +from pptx.oxml import parse_xml +from pptx.oxml.custom_xml import CT_DatastoreItem +from pptx.oxml.xmlchemy import BaseOxmlElement + +if TYPE_CHECKING: + from pptx.package import Package + + +XmlPayload = Union[bytes, str, _Element] + + +class CustomXmlPropertiesPart(XmlPart): + """Corresponds to part named `/customXml/itemPropsN.xml`. + + Carries the `datastoreItem` GUID identifying its sibling `CustomXmlPart` + across edits, plus the optional list of `` URIs the data part + claims to conform to. + """ + + _element: CT_DatastoreItem + + @classmethod + def new( + cls, + package: "Package", + partname: PackURI, + datastore_item_id: str, + schema_refs: Iterable[str] = (), + ) -> "CustomXmlPropertiesPart": + """Return a fresh `CustomXmlPropertiesPart` at `partname` for `package`.""" + item_elm = CT_DatastoreItem.new(datastore_item_id, schema_refs=schema_refs) + return cls(partname, CT.OFC_CUSTOM_XML_PROPERTIES, package, item_elm) + + @property + def datastore_item_id(self) -> str: + """The `ds:itemID` attribute — a GUID like `"{1A2B...}"`.""" + return self._element.itemID + + @datastore_item_id.setter + def datastore_item_id(self, value: str) -> None: + self._element.itemID = value + + @property + def schema_refs(self) -> tuple[str, ...]: + """Tuple of `` values in document order.""" + return self._element.schema_ref_uris + + def add_schema_ref(self, uri: str) -> None: + """Append a `` (idempotent on `uri`).""" + self._element.add_schema_ref(uri) + + def remove_schema_ref(self, uri: str) -> bool: + """Remove the schemaRef with `uri`, returning True if found.""" + return self._element.remove_schema_ref(uri) + + +class CustomXmlPart(XmlPart): + """Corresponds to part named `/customXml/itemN.xml`. + + Holds an arbitrary XML payload supplied by the caller. The payload's root + element name and namespaces are caller-defined — `python-pptx` does not + impose a schema. Each `CustomXmlPart` has a sibling `CustomXmlPropertiesPart` + that carries the part's `datastoreItem` GUID; the rel between them is of + type `RT.CUSTOM_XML_PROPS`. + + NOTE: This class is intentionally **not** registered with `PartFactory` + against `CT.XML`. Loaded `application/xml` parts are produced as base + `Part` instances, and the Phase-3 `CustomXmlParts` collection upgrades + them on enumeration. See `Plans/customxml-implementation-plan.md` §3.6. + """ + + @classmethod + def new_pair( + cls, + package: "Package", + xml_payload: XmlPayload, + *, + datastore_item_id: str | None = None, + schema_refs: Iterable[str] = (), + ) -> "CustomXmlPart": + """Create a paired CustomXmlPart + CustomXmlPropertiesPart in `package`. + + Returns the data part. The props part is related from the data part + via `RT.CUSTOM_XML_PROPS`. Neither is yet related from any outside + source — that is the caller's job (Phase-3 `CustomXmlParts.add(...)`). + + `xml_payload` may be `bytes`, a `str`, or an existing lxml `_Element`. + If `datastore_item_id` is omitted a fresh `uuid4()` is generated and + wrapped in curly braces to match Office's format. + + Partname allocation: `/customXml/itemN.xml` and `/customXml/itemPropsN.xml` + share the same `N`, picked as the next free index across existing data + parts in `package` (props parts are looked up via the data → props rel, + not via partname pattern). + """ + idx = _next_customxml_index(package) + data_partname = PackURI("/customXml/item%d.xml" % idx) + props_partname = PackURI("/customXml/itemProps%d.xml" % idx) + + element = _parse_payload(xml_payload) + data_part = cls(data_partname, CT.XML, package, element) + + if datastore_item_id is None: + datastore_item_id = "{%s}" % str(uuid.uuid4()).upper() + + props_part = CustomXmlPropertiesPart.new( + package, props_partname, datastore_item_id, schema_refs + ) + + data_part.relate_to(props_part, RT.CUSTOM_XML_PROPS) + return data_part + + @property + def props_part(self) -> CustomXmlPropertiesPart: + """Return the related `CustomXmlPropertiesPart` for this data part. + + Raises `KeyError` if the props rel is missing — a malformed package + the caller is expected to repair via `CustomXmlPart.new_pair(...)`. + """ + return cast( + CustomXmlPropertiesPart, self.part_related_by(RT.CUSTOM_XML_PROPS) + ) + + @property + def datastore_item_id(self) -> str: + """Convenience accessor delegating to the sibling props part.""" + return self.props_part.datastore_item_id + + @datastore_item_id.setter + def datastore_item_id(self, value: str) -> None: + self.props_part.datastore_item_id = value + + @property + def schema_refs(self) -> tuple[str, ...]: + """Convenience accessor delegating to the sibling props part.""" + return self.props_part.schema_refs + + def add_schema_ref(self, uri: str) -> None: + """Convenience pass-through to the sibling props part.""" + self.props_part.add_schema_ref(uri) + + def remove_schema_ref(self, uri: str) -> bool: + """Convenience pass-through to the sibling props part.""" + return self.props_part.remove_schema_ref(uri) + + @property + def element(self) -> BaseOxmlElement: + """Live root element of the customXml payload. + + Mutating its children mutates the part; the next `package.save(...)` + will serialize the updated tree. + """ + return self._element + + def replace_xml(self, xml_payload: XmlPayload) -> None: + """Replace the entire XML payload with `xml_payload`. + + The root element is replaced wholesale; `datastore_item_id` and + `schema_refs` are unaffected (they live on the sibling props part). + """ + self._element = _parse_payload(xml_payload) + + @property + def name(self) -> str | None: + """The application-assigned name for this part, or `None`. + + Names are stored as reserved entries in `/docProps/custom.xml` keyed + by `datastore_item_id`. See `Plans/customxml-implementation-plan.md` + §3.4 for the rationale (Q3 default). + """ + # Local import to avoid `parts → custom_xml → parts` cycle. + from pptx.custom_xml import NAME_PROPERTY_PREFIX + + try: + cp_part = self.package.custom_properties_part + except Exception: # pragma: no cover — package without custom_properties_part hook + return None + prop = cp_part.get_property(NAME_PROPERTY_PREFIX + self.datastore_item_id) + if prop is None: + return None + value = prop.value + return value if isinstance(value, str) else None + + def add_item( + self, tag: str, text: str = "", **attrs: str + ) -> BaseOxmlElement: + """Append a child element `text` with `attrs`. + + Convenience for the common "flat list of items" customXml shape; for + arbitrary structure mutate :attr:`element` directly. The `tag` is + used verbatim — pass a fully-namespaced Clark name if the parent + root uses a default namespace and you need to escape it explicitly, + otherwise lxml will attach the new element to the parent's namespace. + + Returns the newly appended element so the caller can chain further + edits on it. + """ + from lxml import etree + + new = etree.SubElement(self._element, tag) + if text: + new.text = text + for k, v in attrs.items(): + new.set(k, v) + return cast(BaseOxmlElement, new) + + +def _parse_payload(xml_payload: XmlPayload) -> BaseOxmlElement: + """Coerce `xml_payload` to a `BaseOxmlElement` root. + + Accepts bytes (parsed verbatim), str (utf-8 encoded then parsed), or an + already-parsed lxml `_Element` (returned as-is). Raises `TypeError` for + anything else so the caller fails fast at the boundary. + """ + if isinstance(xml_payload, bytes): + return cast("BaseOxmlElement", parse_xml(xml_payload)) + if isinstance(xml_payload, str): + return cast("BaseOxmlElement", parse_xml(xml_payload.encode("utf-8"))) + if isinstance(xml_payload, _Element): + return cast("BaseOxmlElement", xml_payload) + raise TypeError( + "xml_payload must be bytes, str, or lxml _Element; got %s" + % type(xml_payload).__name__ + ) + + +def _next_customxml_index(package: "Package") -> int: + """Return the next free `N` for `/customXml/itemN.xml`. + + Walks `package.iter_parts()` and skips `itemProps*.xml` parts. Reuses + gaps in the sequence (e.g. if items 1 and 3 exist, returns 2). + """ + used: set[int] = set() + data_prefix = "/customXml/item" + props_prefix = "/customXml/itemProps" + for part in package.iter_parts(): + partname = str(part.partname) + if not partname.startswith(data_prefix): + continue + if partname.startswith(props_prefix): + continue + # partname looks like /customXml/itemN.xml + suffix = partname[len(data_prefix) :] + if not suffix.endswith(".xml"): + continue + try: + used.add(int(suffix[: -len(".xml")])) + except ValueError: + continue + n = 1 + while n in used: + n += 1 + return n diff --git a/src/pptx/parts/presentation.py b/src/pptx/parts/presentation.py index 1413de457..36491d99d 100644 --- a/src/pptx/parts/presentation.py +++ b/src/pptx/parts/presentation.py @@ -12,6 +12,8 @@ from pptx.util import lazyproperty if TYPE_CHECKING: + from pptx.custom_properties import CustomProperties + from pptx.custom_xml import CustomXmlParts from pptx.parts.coreprops import CorePropertiesPart from pptx.slide import NotesMaster, Slide, SlideLayout, SlideMaster @@ -41,6 +43,30 @@ def core_properties(self) -> CorePropertiesPart: """ return self.package.core_properties + @lazyproperty + def custom_properties(self) -> CustomProperties: + """Mapping-protocol view over the Custom Document Properties part. + + Lazy — the same wrapper instance is returned across calls. The + underlying `/docProps/custom.xml` part is created on first access if + the package does not already have one. + """ + from pptx.custom_properties import CustomProperties + + return CustomProperties(self.package.custom_properties_part) + + @lazyproperty + def custom_xml_parts(self) -> CustomXmlParts: + """Sequence-like collection of customXml data parts in this package. + + Walks both presentation-scoped (`ppt/_rels/presentation.xml.rels`) and + package-scoped (`/_rels/.rels`) `RT.CUSTOM_XML` relationships. The + same collection instance is reused across calls. + """ + from pptx.custom_xml import CustomXmlParts + + return CustomXmlParts(self) + def get_slide(self, slide_id: int) -> Slide | None: """Return optional related |Slide| object identified by `slide_id`. diff --git a/src/pptx/presentation.py b/src/pptx/presentation.py index a41bfd59a..94fa3fcb5 100644 --- a/src/pptx/presentation.py +++ b/src/pptx/presentation.py @@ -9,6 +9,8 @@ from pptx.util import lazyproperty if TYPE_CHECKING: + from pptx.custom_properties import CustomProperties + from pptx.custom_xml import CustomXmlParts from pptx.oxml.presentation import CT_Presentation, CT_SlideId from pptx.parts.presentation import PresentationPart from pptx.slide import NotesMaster, SlideLayouts @@ -33,6 +35,27 @@ def core_properties(self): """ return self.part.core_properties + @property + def custom_properties(self) -> CustomProperties: + """Mapping-protocol view over the Custom Document Properties part. + + These are the user-defined properties surfaced under + `File → Properties → Advanced` in PowerPoint. Created on first access + if the package does not already have a custom properties part. + """ + return self.part.custom_properties + + @property + def custom_xml_parts(self) -> CustomXmlParts: + """Collection of customXml data parts in this presentation's package. + + Walks both presentation-scoped and package-scoped `RT.CUSTOM_XML` + relationships. Use `.add(...)` to attach a new part, `[i]` or + `["item3.xml"]` to look one up by index or partname tail, and + `.by_guid(...)` / `.by_name(...)` for the other lookup forms. + """ + return self.part.custom_xml_parts + @property def notes_master(self) -> NotesMaster: """Instance of |NotesMaster| for this presentation. diff --git a/src/pptx/shapes/autoshape.py b/src/pptx/shapes/autoshape.py index c7f8cd93e..e884237ff 100644 --- a/src/pptx/shapes/autoshape.py +++ b/src/pptx/shapes/autoshape.py @@ -136,7 +136,7 @@ def _update_adjustments_with_actuals( `guides` is a list of `a:gd` elements. Guides with a name that does not match an adjustment object are skipped. """ - adjustments_by_name = dict((adj.name, adj) for adj in adjustments) + adjustments_by_name = {adj.name: adj for adj in adjustments} for gd in guides: name = gd.name actual = int(gd.fmla[4:]) diff --git a/src/pptx/shapes/shapetree.py b/src/pptx/shapes/shapetree.py index 29623f1f5..42c32de83 100644 --- a/src/pptx/shapes/shapetree.py +++ b/src/pptx/shapes/shapetree.py @@ -792,7 +792,7 @@ def __getitem__(self, idx: int): def __iter__(self): """Generate placeholder shapes in `idx` order.""" - ph_elms = sorted([e for e in self._element.iter_ph_elms()], key=lambda e: e.ph_idx) + ph_elms = sorted(self._element.iter_ph_elms(), key=lambda e: e.ph_idx) return (SlideShapeFactory(e, self) for e in ph_elms) def __len__(self) -> int: diff --git a/src/pptx/text/fonts.py b/src/pptx/text/fonts.py index 5ae054a83..9be19ccd1 100644 --- a/src/pptx/text/fonts.py +++ b/src/pptx/text/fonts.py @@ -175,10 +175,10 @@ def _tables(self): A mapping of OpenType table tag, e.g. 'name', to a table object providing access to the contents of that table. """ - return dict( - (tag, _TableFactory(tag, self._stream, off, len_)) + return { + tag: _TableFactory(tag, self._stream, off, len_) for tag, off, len_ in self._iter_table_records() - ) + } @property def _table_count(self): diff --git a/tests/chart/test_category.py b/tests/chart/test_category.py index 9319d664b..2779d3696 100644 --- a/tests/chart/test_category.py +++ b/tests/chart/test_category.py @@ -30,7 +30,7 @@ def it_can_iterate_over_the_categories_it_contains(self, iter_fixture): Category_, calls, ) = iter_fixture - assert [c for c in categories] == expected_categories + assert list(categories) == expected_categories assert Category_.call_args_list == calls def it_knows_its_depth(self, depth_fixture): diff --git a/tests/chart/test_xmlwriter.py b/tests/chart/test_xmlwriter.py index bb7354983..7db641719 100644 --- a/tests/chart/test_xmlwriter.py +++ b/tests/chart/test_xmlwriter.py @@ -172,8 +172,8 @@ class Describe_BarChartXmlWriter(object): """Unit-test suite for `pptx.chart.xmlwriter._BarChartXmlWriter`.""" @pytest.mark.parametrize( - "member, cat_count, ser_count, cat_type, snippet_name", - ( + ("member", "cat_count", "ser_count", "cat_type", "snippet_name"), + [ ("BAR_CLUSTERED", 2, 2, str, "2x2-bar-clustered"), ("BAR_CLUSTERED", 2, 2, date, "2x2-bar-clustered-date"), ("BAR_CLUSTERED", 2, 2, float, "2x2-bar-clustered-float"), @@ -182,7 +182,7 @@ class Describe_BarChartXmlWriter(object): ("COLUMN_CLUSTERED", 2, 2, str, "2x2-column-clustered"), ("COLUMN_STACKED", 2, 2, str, "2x2-column-stacked"), ("COLUMN_STACKED_100", 2, 2, str, "2x2-column-stacked-100"), - ), + ], ) def it_can_generate_xml_for_bar_type_charts( self, member, cat_count, ser_count, cat_type, snippet_name @@ -284,11 +284,11 @@ class Describe_PieChartXmlWriter(object): """Unit-test suite for `pptx.chart.xmlwriter._PieChartXmlWriter`.""" @pytest.mark.parametrize( - "enum_member, cat_count, ser_count, snippet_name", - ( + ("enum_member", "cat_count", "ser_count", "snippet_name"), + [ ("PIE", 3, 1, "3x1-pie"), ("PIE_EXPLODED", 3, 1, "3x1-pie-exploded"), - ), + ], ) def it_can_generate_xml_for_a_pie_chart(self, enum_member, cat_count, ser_count, snippet_name): chart_type = getattr(XL_CHART_TYPE, enum_member) diff --git a/tests/dml/test_effect.py b/tests/dml/test_effect.py index be08fc19c..d5207aeff 100644 --- a/tests/dml/test_effect.py +++ b/tests/dml/test_effect.py @@ -6,7 +6,7 @@ from pptx.dml.color import ColorFormat from pptx.dml.effect import ShadowFormat -from pptx.util import Emu, Pt +from pptx.util import Pt from ..unitutil.cxml import element, xml diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/test_customxml_roundtrip.py b/tests/integration/test_customxml_roundtrip.py new file mode 100644 index 000000000..ab20e3e35 --- /dev/null +++ b/tests/integration/test_customxml_roundtrip.py @@ -0,0 +1,201 @@ +# pyright: reportPrivateUsage=false + +"""Integration test suite for customXml round-trip. + +Loads each synthetic fixture under ``tests/test_files/customxml/``, exercises +the public API against it, saves to a fresh BytesIO, reloads, and asserts the +state survived. + +Real third-party fixtures (SharePoint-saved, Office.js-produced, VSTO-tooled) +will land later under ``sharepoint-saved.pptx`` etc. once captured during the +manual PowerPoint UI matrix in ``Plans/customxml-implementation-plan.md`` §5.4. +""" + +from __future__ import annotations + +import os +from io import BytesIO + +from pptx import Presentation +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.parts.custom_xml import CustomXmlPart + +_FIXTURE_DIR = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + os.pardir, + "test_files", + "customxml", +) + + +def _fixture(name: str) -> str: + return os.path.join(_FIXTURE_DIR, name) + + +def _roundtrip(prs): + buf = BytesIO() + prs.save(buf) + buf.seek(0) + return Presentation(buf) + + +class DescribePresentationScopedFixture: + def it_loads_the_part(self): + prs = Presentation(_fixture("presentation-scoped.pptx")) + assert len(prs.custom_xml_parts) == 1 + + def it_upgrades_loaded_part_to_CustomXmlPart_class(self): + prs = Presentation(_fixture("presentation-scoped.pptx")) + part = prs.custom_xml_parts[0] + assert isinstance(part, CustomXmlPart) + + def it_preserves_the_payload(self): + prs = Presentation(_fixture("presentation-scoped.pptx")) + part = prs.custom_xml_parts.by_name("provenance") + assert part is not None + assert part.element.tag == "{urn:my:provenance}provenance" + source = part.element.find("{urn:my:provenance}source") + assert source is not None + assert source.text == "integration-fixture" + + def it_preserves_the_pinned_guid(self): + prs = Presentation(_fixture("presentation-scoped.pptx")) + part = prs.custom_xml_parts[0] + assert part.datastore_item_id == "{1A2B3C4D-5E6F-7890-ABCD-EF1234567890}" + + def it_preserves_the_schema_refs(self): + prs = Presentation(_fixture("presentation-scoped.pptx")) + part = prs.custom_xml_parts[0] + assert part.schema_refs == ("urn:my:provenance",) + + def it_preserves_the_presentation_scope_through_save(self): + prs = Presentation(_fixture("presentation-scoped.pptx")) + reloaded = _roundtrip(prs) + prs_rel_types = {r.reltype for r in reloaded.part.rels.values()} + pkg_rel_types = {r.reltype for r in reloaded.part.package._rels.values()} + assert RT.CUSTOM_XML in prs_rel_types + assert RT.CUSTOM_XML not in pkg_rel_types + + +class DescribePackageScopedFixture: + def it_loads_the_part(self): + prs = Presentation(_fixture("package-scoped.pptx")) + assert len(prs.custom_xml_parts) == 1 + + def it_preserves_the_payload(self): + prs = Presentation(_fixture("package-scoped.pptx")) + part = prs.custom_xml_parts.by_name("vsto") + assert part is not None + assert part.element.tag == "{urn:my:vsto}vsto-config" + + def it_preserves_the_package_scope_through_save(self): + prs = Presentation(_fixture("package-scoped.pptx")) + reloaded = _roundtrip(prs) + prs_rel_types = {r.reltype for r in reloaded.part.rels.values()} + pkg_rel_types = {r.reltype for r in reloaded.part.package._rels.values()} + assert RT.CUSTOM_XML in pkg_rel_types + assert RT.CUSTOM_XML not in prs_rel_types + + def it_preserves_the_pinned_guid(self): + prs = Presentation(_fixture("package-scoped.pptx")) + part = prs.custom_xml_parts[0] + assert part.datastore_item_id == "{ABCDEF12-3456-7890-ABCD-EF1234567890}" + + +class DescribeMultipartFixture: + def it_loads_two_customxml_parts_at_mixed_scopes(self): + prs = Presentation(_fixture("multipart.pptx")) + assert len(prs.custom_xml_parts) == 2 + 1 # provenance + extra + readme blob + + def it_preserves_custom_document_properties(self): + prs = Presentation(_fixture("multipart.pptx")) + assert prs.custom_properties["Source"] == "deck-builder-cli@1.4.2" + assert prs.custom_properties["BuildNumber"] == 42 + assert prs.custom_properties["IsDraft"] is True + + def it_finds_each_part_by_name(self): + prs = Presentation(_fixture("multipart.pptx")) + assert prs.custom_xml_parts.by_name("provenance") is not None + assert prs.custom_xml_parts.by_name("extra") is not None + assert prs.custom_xml_parts.by_name("readme") is not None + + def it_round_trips_through_save_load_with_mutations(self): + prs = Presentation(_fixture("multipart.pptx")) + # mutate something in each layer + prs.custom_properties["NewKey"] = "added" + prs.custom_xml_parts.by_name("provenance").add_item( + "added-by-test", "value" + ) + + reloaded = _roundtrip(prs) + + assert reloaded.custom_properties["NewKey"] == "added" + assert reloaded.custom_properties["Source"] == "deck-builder-cli@1.4.2" + prov = reloaded.custom_xml_parts.by_name("provenance") + assert prov is not None + # The added child element survived the round-trip + added = [c for c in prov.element if c.tag.endswith("added-by-test")] + assert len(added) == 1 + assert added[0].text == "value" + + def it_round_trips_the_string_blob_helper(self): + prs = Presentation(_fixture("multipart.pptx")) + content = prs.custom_xml_parts.read_string_blob("readme") + assert content is not None + assert "# Hello" in content + assert "markdown content" in content + + def it_remove_then_save_drops_the_part(self): + prs = Presentation(_fixture("multipart.pptx")) + provenance = prs.custom_xml_parts.by_name("provenance") + prs.custom_xml_parts.remove(provenance) + reloaded = _roundtrip(prs) + assert reloaded.custom_xml_parts.by_name("provenance") is None + # Other parts still present + assert reloaded.custom_xml_parts.by_name("extra") is not None + assert reloaded.custom_xml_parts.by_name("readme") is not None + + +class DescribeCleanFixture: + """A presentation with no customXml at all should have no related rels.""" + + def it_has_no_customxml_parts(self): + prs = Presentation(_fixture("clean.pptx")) + assert len(prs.custom_xml_parts) == 0 + + def it_round_trips_with_no_rels_added(self): + prs = Presentation(_fixture("clean.pptx")) + # do nothing + reloaded = _roundtrip(prs) + prs_rel_types = {r.reltype for r in reloaded.part.rels.values()} + pkg_rel_types = {r.reltype for r in reloaded.part.package._rels.values()} + assert RT.CUSTOM_XML not in prs_rel_types + assert RT.CUSTOM_XML not in pkg_rel_types + assert RT.CUSTOM_PROPERTIES not in pkg_rel_types + + def it_can_have_customxml_added_after_loading(self): + prs = Presentation(_fixture("clean.pptx")) + prs.custom_xml_parts.add( + b'', + name="after-load", + ) + reloaded = _roundtrip(prs) + part = reloaded.custom_xml_parts.by_name("after-load") + assert part is not None + assert part.element.tag == "{u:al}after-load" + + +class DescribeCoreAndCustomCoexistence: + def it_preserves_core_properties_alongside_custom_ones(self): + prs = Presentation(_fixture("multipart.pptx")) + prs.core_properties.author = "Athena" + prs.core_properties.subject = "Integration test" + + reloaded = _roundtrip(prs) + + assert reloaded.core_properties.author == "Athena" + assert reloaded.core_properties.subject == "Integration test" + # custom properties still intact + assert reloaded.custom_properties["Source"] == "deck-builder-cli@1.4.2" + # customXml parts still intact + assert len(reloaded.custom_xml_parts) == 3 diff --git a/tests/opc/test_package.py b/tests/opc/test_package.py index 8c0e95809..c0671bc00 100644 --- a/tests/opc/test_package.py +++ b/tests/opc/test_package.py @@ -224,7 +224,7 @@ def it_provides_access_to_the_main_document_part(self, request): assert presentation_part is presentation_part_ @pytest.mark.parametrize( - "ns, expected_n", (((), 1), ((1,), 2), ((1, 2), 3), ((2, 4), 3), ((1, 4), 3)) + ("ns", "expected_n"), [((), 1), ((1,), 2), ((1, 2), 3), ((2, 4), 3), ((1, 4), 3)] ) def it_can_find_the_next_available_partname(self, request, ns, expected_n): tmpl = "/x%d.xml" @@ -569,13 +569,13 @@ def it_can_construct_from_content_types_xml(self, request): ) @pytest.mark.parametrize( - "partname, expected_value", - ( + ("partname", "expected_value"), + [ ("/docProps/core.xml", CT.OPC_CORE_PROPERTIES), ("/ppt/presentation.xml", CT.PML_PRESENTATION_MAIN), ("/PPT/Presentation.XML", CT.PML_PRESENTATION_MAIN), ("/ppt/viewprops.xml", CT.PML_VIEW_PROPS), - ), + ], ) def it_matches_an_override_on_case_insensitive_partname( self, content_type_map, partname, expected_value @@ -583,12 +583,12 @@ def it_matches_an_override_on_case_insensitive_partname( assert content_type_map[PackURI(partname)] == expected_value @pytest.mark.parametrize( - "partname, expected_value", - ( + ("partname", "expected_value"), + [ ("/foo/bar.xml", CT.XML), ("/FOO/BAR.Rels", CT.OPC_RELATIONSHIPS), ("/foo/bar.jpeg", CT.JPEG), - ), + ], ) def it_falls_back_to_case_insensitive_extension_default_match( self, content_type_map, partname, expected_value @@ -617,7 +617,7 @@ def content_type_map(self): class Describe_Relationships: """Unit-test suite for `pptx.opc.package._Relationships` objects.""" - @pytest.mark.parametrize("rId, expected_value", (("rId1", True), ("rId2", False))) + @pytest.mark.parametrize(("rId", "expected_value"), [("rId1", True), ("rId2", False)]) def it_knows_whether_it_contains_a_relationship_with_rId( self, _rels_prop_, rId, expected_value ): @@ -635,7 +635,7 @@ def but_it_raises_KeyError_when_no_relationship_has_rId(self, _rels_prop_): assert str(e.value) == "\"no relationship with key 'rId6'\"" def it_can_iterate_the_rIds_of_the_relationships_it_contains(self, request, _rels_prop_): - rels_ = set(instance_mock(request, _Relationship) for n in range(5)) + rels_ = {instance_mock(request, _Relationship) for n in range(5)} _rels_prop_.return_value = {"rId%d" % (i + 1): r for i, r in enumerate(rels_)} relationships = _Relationships(None) @@ -827,14 +827,14 @@ def and_it_can_add_an_external_relationship_to_help( assert rId == "rId9" @pytest.mark.parametrize( - "target_ref, is_external, expected_value", - ( + ("target_ref", "is_external", "expected_value"), + [ ("http://url", True, "rId1"), ("part_1", False, "rId2"), ("http://foo", True, "rId3"), ("part_2", False, "rId4"), ("http://bar", True, None), - ), + ], ) def it_can_get_a_matching_relationship_to_help( self, request, _rels_by_reltype_prop_, target_ref, is_external, expected_value @@ -872,18 +872,18 @@ def but_it_returns_None_when_there_is_no_matching_relationship(self, _rels_by_re assert relationships._get_matching(RT.HYPERLINK, "http://url", True) is None @pytest.mark.parametrize( - "rIds, expected_value", - ( + ("rIds", "expected_value"), + [ ((), "rId1"), (("rId1",), "rId2"), (("rId1", "rId2"), "rId3"), (("rId1", "rId4"), "rId3"), (("rId1", "rId4", "rId6"), "rId3"), (("rId1", "rId2", "rId6"), "rId4"), - ), + ], ) def it_finds_the_next_rId_to_help(self, _rels_prop_, rIds, expected_value): - _rels_prop_.return_value = {rId: None for rId in rIds} + _rels_prop_.return_value = dict.fromkeys(rIds) relationships = _Relationships(None) assert relationships._next_rId == expected_value @@ -960,8 +960,8 @@ def it_can_construct_from_xml(self, request, part_): assert isinstance(relationship, _Relationship) @pytest.mark.parametrize( - "target_mode, expected_value", - ((RTM.INTERNAL, False), (RTM.EXTERNAL, True), (None, False)), + ("target_mode", "expected_value"), + [(RTM.INTERNAL, False), (RTM.EXTERNAL, True), (None, False)], ) def it_knows_whether_it_is_external(self, target_mode, expected_value): relationship = _Relationship(None, None, None, target_mode, None) diff --git a/tests/oxml/shapes/test_picture.py b/tests/oxml/shapes/test_picture.py index 546d6b0fd..f01b5eea1 100644 --- a/tests/oxml/shapes/test_picture.py +++ b/tests/oxml/shapes/test_picture.py @@ -12,13 +12,13 @@ class DescribeCT_Picture(object): """Unit-test suite for `pptx.oxml.shapes.picture.CT_Picture` objects.""" @pytest.mark.parametrize( - "desc, xml_desc", - ( + ("desc", "xml_desc"), + [ ("kittens.jpg", "kittens.jpg"), ("bits&bobs.png", "bits&bobs.png"), ("img&.png", "img&.png"), ("ime.png", "im<ag>e.png"), - ), + ], ) def it_can_create_a_new_pic_element(self, desc, xml_desc): """`desc` attr (often filename) is XML-escaped to handle special characters. diff --git a/tests/oxml/test_custom_properties.py b/tests/oxml/test_custom_properties.py new file mode 100644 index 000000000..c98476a00 --- /dev/null +++ b/tests/oxml/test_custom_properties.py @@ -0,0 +1,272 @@ +# pyright: reportPrivateUsage=false + +"""Unit-test suite for `pptx.oxml.custom_properties`.""" + +from __future__ import annotations + +import datetime as dt + +import pytest +from lxml import etree + +from pptx.oxml import parse_xml +from pptx.oxml.custom_properties import ( + DEFAULT_FMTID, + CT_Properties, + CT_Property, + CT_VtBool, + CT_VtFiletime, + CT_VtI4, + CT_VtLpwstr, + CT_VtR8, +) +from pptx.oxml.ns import nsdecls + + +def _props_xml(*property_xml_chunks: str) -> bytes: + body = "".join(property_xml_chunks) + return ("%s" % (nsdecls("op", "vt"), body)).encode() + + +def _property_xml(name: str, pid: int, vt_inner_xml: str) -> str: + return ( + '%s' + % (DEFAULT_FMTID, pid, name, vt_inner_xml) + ) + + +class DescribeCT_Properties: + def it_parses_to_the_registered_class(self): + root = parse_xml(_props_xml()) + assert isinstance(root, CT_Properties) + + def it_can_create_a_fresh_root_with_both_namespaces_declared(self): + root = CT_Properties.new_properties() + xml = etree.tostring(root, encoding="unicode") + assert "xmlns:op=" in xml + assert "xmlns:vt=" in xml + + def it_returns_the_property_lst_in_document_order(self): + root = parse_xml( + _props_xml( + _property_xml("alpha", 2, "a"), + _property_xml("beta", 3, "1"), + ) + ) + assert root.property_names == ("alpha", "beta") + + def it_finds_a_property_by_name(self): + root = parse_xml( + _props_xml( + _property_xml("Source", 2, "cli"), + _property_xml("Build", 3, "42"), + ) + ) + assert root.get_property("Build").pid == 3 + assert root.get_property("Missing") is None + + def it_removes_a_property_by_name(self): + root = parse_xml( + _props_xml( + _property_xml("Source", 2, "cli"), + _property_xml("Build", 3, "42"), + ) + ) + assert root.remove_property("Source") is True + assert root.property_names == ("Build",) + assert root.remove_property("Source") is False # idempotent + + @pytest.mark.parametrize( + ("value", "expected_child_tag"), + [ + ("hello", "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}lpwstr"), + (42, "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}i4"), + (3.14, "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}r8"), + (True, "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}bool"), + ( + dt.datetime(2026, 5, 5, 14, 0, 0), + "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}filetime", + ), + ], + ) + def it_dispatches_add_property_by_python_type(self, value, expected_child_tag): + root = CT_Properties.new_properties() + prop = root.add_property("X", value) + assert prop.fmtid == DEFAULT_FMTID + assert prop.name == "X" + # exactly one vt:* child, of the expected tag + assert len(prop) == 1 + assert prop[0].tag == expected_child_tag + + def it_round_trips_value_for_each_vt_type(self): + root = CT_Properties.new_properties() + cases: list[tuple[str, object]] = [ + ("Source", "deck-builder-cli@1.4.2"), + ("BuildNumber", 42), + ("WeightedScore", 3.14159), + ("IsDraft", True), + ("IsFinal", False), + ("GeneratedAt", dt.datetime(2026, 5, 5, 14, 0, 0)), + ] + for name, value in cases: + root.add_property(name, value) + + serialized = etree.tostring(root) + reparsed = parse_xml(serialized) + for name, value in cases: + prop = reparsed.get_property(name) + assert prop is not None, name + assert prop.value == value, name + + def it_auto_assigns_unique_pids_starting_at_2(self): + root = CT_Properties.new_properties() + a = root.add_property("a", "1") + b = root.add_property("b", "2") + c = root.add_property("c", "3") + assert (a.pid, b.pid, c.pid) == (2, 3, 4) + + def it_skips_used_pids_when_assigning(self): + # parse a doc where pid 2 is already used; the next add_property must use 3 + root = parse_xml( + _props_xml(_property_xml("Existing", 2, "x")) + ) + new_prop = root.add_property("New", "y") + assert new_prop.pid == 3 + + def it_raises_TypeError_on_unsupported_value_type(self): + root = CT_Properties.new_properties() + with pytest.raises(TypeError): + root.add_property("bad", object()) + + def it_treats_bool_as_bool_not_int(self): + # bool is-a int in Python; the dispatch must still produce vt:bool, not vt:i4 + root = CT_Properties.new_properties() + prop_true = root.add_property("flag", True) + assert isinstance(prop_true.bool_, CT_VtBool) + assert prop_true.i4 is None + + +class DescribeCT_VtLpwstr: + def it_round_trips_string_text(self): + prop = parse_xml( + _property_xml("X", 2, "hello world").encode() + if False + else ( + "" + "hello world" + % (nsdecls("op", "vt"), DEFAULT_FMTID) + ).encode() + ) + assert isinstance(prop.lpwstr, CT_VtLpwstr) + assert prop.value == "hello world" + + def it_rejects_non_string_assignment(self): + prop = CT_Properties.new_properties().add_property("X", "seed") + prop_lpwstr: CT_VtLpwstr = prop.lpwstr + with pytest.raises(TypeError): + prop_lpwstr.value_typed = 42 # type: ignore[assignment] + + def it_rejects_overlong_strings(self): + prop = CT_Properties.new_properties().add_property("X", "seed") + with pytest.raises(ValueError): + prop.lpwstr.value_typed = "x" * 256 + + +class DescribeCT_VtI4: + @pytest.mark.parametrize("value", [-2147483648, -1, 0, 1, 42, 2147483647]) + def it_round_trips_int_text(self, value): + prop = CT_Properties.new_properties().add_property("X", value) + assert isinstance(prop.i4, CT_VtI4) + assert prop.value == value + + def it_rejects_out_of_range_ints(self): + prop = CT_Properties.new_properties().add_property("X", 0) + with pytest.raises(ValueError): + prop.i4.value_typed = 2147483648 + + def it_rejects_bool_assignment_at_the_leaf(self): + # the dispatch in CT_Property.value picks vt:bool for bool, but if a + # caller reaches into the leaf they should still get the type guard + prop = CT_Properties.new_properties().add_property("X", 0) + with pytest.raises(TypeError): + prop.i4.value_typed = True + + +class DescribeCT_VtR8: + @pytest.mark.parametrize("value", [-1.0, 0.0, 0.5, 3.14159, 1e20, -1e-20]) + def it_round_trips_float_text(self, value): + prop = CT_Properties.new_properties().add_property("X", value) + assert isinstance(prop.r8, CT_VtR8) + assert prop.value == pytest.approx(value) + + +class DescribeCT_VtBool: + @pytest.mark.parametrize( + ("xml_text", "expected"), + [("true", True), ("false", False), ("1", True), ("0", False), (" TRUE ", True)], + ) + def it_reads_office_and_xsd_boolean_lexical_forms(self, xml_text, expected): + prop_xml = ( + "" + "%s" + % (nsdecls("op", "vt"), DEFAULT_FMTID, xml_text) + ) + prop = parse_xml(prop_xml.encode()) + assert prop.value is expected + + @pytest.mark.parametrize(("py_value", "expected_text"), [(True, "true"), (False, "false")]) + def it_writes_office_lexical_form(self, py_value, expected_text): + prop = CT_Properties.new_properties().add_property("X", py_value) + assert prop.bool_.text == expected_text + + def it_raises_on_invalid_text(self): + prop_xml = ( + "" + "maybe" + % (nsdecls("op", "vt"), DEFAULT_FMTID) + ) + prop = parse_xml(prop_xml.encode()) + with pytest.raises(ValueError): + _ = prop.value + + +class DescribeCT_VtFiletime: + def it_round_trips_a_naive_utc_datetime(self): + original = dt.datetime(2026, 5, 5, 14, 0, 0) + prop = CT_Properties.new_properties().add_property("X", original) + assert isinstance(prop.filetime, CT_VtFiletime) + assert prop.filetime.text == "2026-05-05T14:00:00Z" + assert prop.value == original + + def it_normalizes_a_tz_aware_datetime_to_utc(self): + eastern = dt.timezone(dt.timedelta(hours=-5)) + aware = dt.datetime(2026, 5, 5, 9, 0, 0, tzinfo=eastern) # 14:00 UTC + prop = CT_Properties.new_properties().add_property("X", aware) + assert prop.filetime.text == "2026-05-05T14:00:00Z" + + def it_parses_offset_form_too(self): + prop_xml = ( + "" + "2026-05-05T09:00:00-05:00" + % (nsdecls("op", "vt"), DEFAULT_FMTID) + ) + prop = parse_xml(prop_xml.encode()) + assert prop.value == dt.datetime(2026, 5, 5, 14, 0, 0) + + +class DescribeCT_Property_value_setter: + def it_replaces_an_existing_value_child(self): + prop = CT_Properties.new_properties().add_property("X", "old") + prop.value = 99 + assert prop.lpwstr is None + assert prop.value == 99 + + def it_returns_None_for_value_when_no_child_present(self): + # build a stripped property element by parsing + prop_xml = ( + '' + % (nsdecls("op", "vt"), DEFAULT_FMTID) + ) + prop = parse_xml(prop_xml.encode()) + assert isinstance(prop, CT_Property) + assert prop.value is None diff --git a/tests/oxml/test_custom_xml.py b/tests/oxml/test_custom_xml.py new file mode 100644 index 000000000..2932a2130 --- /dev/null +++ b/tests/oxml/test_custom_xml.py @@ -0,0 +1,128 @@ +# pyright: reportPrivateUsage=false + +"""Unit-test suite for `pptx.oxml.custom_xml`.""" + +from __future__ import annotations + +from lxml import etree + +from pptx.oxml import parse_xml +from pptx.oxml.custom_xml import ( + CT_DatastoreItem, + CT_DatastoreSchemaRef, + CT_DatastoreSchemaRefs, +) +from pptx.oxml.ns import nsdecls + +_GUID_A = "{1A2B3C4D-5E6F-7890-ABCD-EF1234567890}" +_GUID_B = "{ABCDEF12-3456-7890-ABCD-EF1234567890}" + + +def _datastore_xml(item_id: str, *uris: str) -> bytes: + schema_refs = "" + if uris: + schema_refs = "%s" % "".join( + '' % u for u in uris + ) + return ( + '%s' + % (nsdecls("ds"), item_id, schema_refs) + ).encode() + + +class DescribeCT_DatastoreItem: + def it_parses_to_the_registered_class(self): + root = parse_xml(_datastore_xml(_GUID_A)) + assert isinstance(root, CT_DatastoreItem) + + def it_exposes_the_itemID_attribute(self): + root = parse_xml(_datastore_xml(_GUID_A)) + assert root.itemID == _GUID_A + + def it_can_change_the_itemID_attribute(self): + root = parse_xml(_datastore_xml(_GUID_A)) + root.itemID = _GUID_B + assert root.itemID == _GUID_B + + def it_returns_an_empty_tuple_when_no_schemaRefs_present(self): + root = parse_xml(_datastore_xml(_GUID_A)) + assert root.schemaRefs is None + assert root.schema_ref_uris == () + + def it_lists_schema_ref_uris_in_document_order(self): + root = parse_xml(_datastore_xml(_GUID_A, "urn:a", "urn:b", "urn:c")) + assert root.schema_ref_uris == ("urn:a", "urn:b", "urn:c") + + def it_creates_a_fresh_root_via_new(self): + elm = CT_DatastoreItem.new(_GUID_A) + assert elm.itemID == _GUID_A + assert elm.schema_ref_uris == () + + def it_creates_a_fresh_root_with_initial_schema_refs(self): + elm = CT_DatastoreItem.new(_GUID_B, schema_refs=["urn:foo", "urn:bar"]) + assert elm.schema_ref_uris == ("urn:foo", "urn:bar") + + def it_adds_a_schema_ref_creating_the_envelope_when_absent(self): + elm = CT_DatastoreItem.new(_GUID_A) + elm.add_schema_ref("urn:foo") + assert isinstance(elm.schemaRefs, CT_DatastoreSchemaRefs) + assert elm.schema_ref_uris == ("urn:foo",) + + def it_returns_existing_ref_on_duplicate_add(self): + elm = CT_DatastoreItem.new(_GUID_A, schema_refs=["urn:foo"]) + first = elm.add_schema_ref("urn:foo") + second = elm.add_schema_ref("urn:foo") + assert first is second + assert elm.schema_ref_uris == ("urn:foo",) + + def it_removes_a_schema_ref_by_uri(self): + elm = CT_DatastoreItem.new(_GUID_A, schema_refs=["urn:a", "urn:b"]) + assert elm.remove_schema_ref("urn:a") is True + assert elm.schema_ref_uris == ("urn:b",) + + def it_returns_False_when_removing_nonexistent_ref(self): + elm = CT_DatastoreItem.new(_GUID_A, schema_refs=["urn:a"]) + assert elm.remove_schema_ref("urn:missing") is False + + def it_drops_the_envelope_when_the_last_ref_is_removed(self): + elm = CT_DatastoreItem.new(_GUID_A, schema_refs=["urn:only"]) + assert elm.remove_schema_ref("urn:only") is True + assert elm.schemaRefs is None + assert elm.schema_ref_uris == () + + def it_round_trips_through_parse_serialize(self): + elm = CT_DatastoreItem.new(_GUID_A, schema_refs=["urn:x", "urn:y"]) + serialized = etree.tostring(elm) + reparsed = parse_xml(serialized) + assert isinstance(reparsed, CT_DatastoreItem) + assert reparsed.itemID == _GUID_A + assert reparsed.schema_ref_uris == ("urn:x", "urn:y") + + +class DescribeCT_DatastoreSchemaRef: + def it_parses_to_the_registered_class(self): + root = parse_xml(_datastore_xml(_GUID_A, "urn:foo")) + ref = root.schemaRefs.schemaRef_lst[0] + assert isinstance(ref, CT_DatastoreSchemaRef) + + def it_exposes_the_uri_attribute(self): + root = parse_xml(_datastore_xml(_GUID_A, "urn:foo")) + assert root.schemaRefs.schemaRef_lst[0].uri == "urn:foo" + + def it_can_change_the_uri_attribute(self): + root = parse_xml(_datastore_xml(_GUID_A, "urn:foo")) + ref = root.schemaRefs.schemaRef_lst[0] + ref.uri = "urn:replaced" + assert root.schema_ref_uris == ("urn:replaced",) + + +class DescribeCT_DatastoreSchemaRefs: + def it_finds_a_ref_by_uri(self): + root = parse_xml(_datastore_xml(_GUID_A, "urn:a", "urn:b")) + found = root.schemaRefs.find_by_uri("urn:b") + assert isinstance(found, CT_DatastoreSchemaRef) + assert found.uri == "urn:b" + + def it_returns_None_for_unknown_uri(self): + root = parse_xml(_datastore_xml(_GUID_A, "urn:a")) + assert root.schemaRefs.find_by_uri("urn:missing") is None diff --git a/tests/parts/test_chart.py b/tests/parts/test_chart.py index b0a41f581..9d9e35903 100644 --- a/tests/parts/test_chart.py +++ b/tests/parts/test_chart.py @@ -89,8 +89,8 @@ def but_it_returns_None_when_the_chart_has_no_xlsx_part(self): assert chart_workbook.xlsx_part is None @pytest.mark.parametrize( - "chartSpace_cxml, expected_cxml", - ( + ("chartSpace_cxml", "expected_cxml"), + [ ( "c:chartSpace{r:a=b}", "c:chartSpace{r:a=b}/c:externalData{r:id=rId" "42}/c:autoUpdate{val=0}", @@ -99,7 +99,7 @@ def but_it_returns_None_when_the_chart_has_no_xlsx_part(self): "c:chartSpace/c:externalData{r:id=rId66}", "c:chartSpace/c:externalData{r:id=rId42}", ), - ), + ], ) def it_can_change_the_chart_xlsx_part( self, chart_part_, xlsx_part_, chartSpace_cxml, expected_cxml diff --git a/tests/parts/test_custom_properties.py b/tests/parts/test_custom_properties.py new file mode 100644 index 000000000..62d7c76c5 --- /dev/null +++ b/tests/parts/test_custom_properties.py @@ -0,0 +1,108 @@ +# pyright: reportPrivateUsage=false + +"""Unit-test suite for `pptx.parts.custom_properties`.""" + +from __future__ import annotations + +import datetime as dt + +import pytest + +from pptx.opc.constants import CONTENT_TYPE as CT +from pptx.oxml.custom_properties import DEFAULT_FMTID, CT_Properties +from pptx.oxml.ns import nsdecls +from pptx.parts.custom_properties import CustomPropertiesPart + + +def _props_xml(*property_xml_chunks: str) -> bytes: + body = "".join(property_xml_chunks) + return ("%s" % (nsdecls("op", "vt"), body)).encode() + + +def _property_xml(name: str, pid: int, vt_inner_xml: str) -> str: + return ( + '%s' + % (DEFAULT_FMTID, pid, name, vt_inner_xml) + ) + + +class DescribeCustomPropertiesPart: + def it_can_construct_a_default_part(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + assert isinstance(part, CustomPropertiesPart) + assert part.content_type == CT.OFC_CUSTOM_PROPERTIES + assert part.partname == "/docProps/custom.xml" + assert isinstance(part._element, CT_Properties) + assert part.property_names == () + + def it_loads_an_existing_part_from_blob(self): + xml = _props_xml( + _property_xml("Source", 2, "cli"), + _property_xml("Build", 3, "42"), + ) + part = CustomPropertiesPart.load( + "/docProps/custom.xml", CT.OFC_CUSTOM_PROPERTIES, None, xml # type: ignore[arg-type] + ) + assert isinstance(part._element, CT_Properties) + assert part.property_names == ("Source", "Build") + + def it_adds_a_property_via_delegation(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + prop = part.add_property("Source", "cli@1.4") + assert prop.name == "Source" + assert prop.value == "cli@1.4" + assert part.property_names == ("Source",) + + def it_dispatches_value_types_through_to_the_element(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + part.add_property("Build", 42) + part.add_property("Score", 3.14) + part.add_property("IsDraft", True) + part.add_property("At", dt.datetime(2026, 5, 5, 14, 0, 0)) + assert part.get_property("Build").value == 42 + assert part.get_property("Score").value == pytest.approx(3.14) + assert part.get_property("IsDraft").value is True + assert part.get_property("At").value == dt.datetime(2026, 5, 5, 14, 0, 0) + + def it_returns_None_when_property_missing(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + assert part.get_property("Missing") is None + + def it_removes_a_property_idempotently(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + part.add_property("X", "a") + assert part.remove_property("X") is True + assert part.property_names == () + assert part.remove_property("X") is False + + def it_supports_in_iter_and_len(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + part.add_property("a", "1") + part.add_property("b", "2") + part.add_property("c", "3") + assert len(part) == 3 + assert list(part) == ["a", "b", "c"] + assert "b" in part + assert "z" not in part + # __contains__ on non-string is False + assert (42 in part) is False # type: ignore[operator] + + def it_round_trips_blob_through_add_and_reparse(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + part.add_property("Source", "cli") + part.add_property("Build", 99) + blob = part.blob + # blob is XML that re-parses to an equivalent CustomPropertiesPart + reloaded = CustomPropertiesPart.load( + "/docProps/custom.xml", CT.OFC_CUSTOM_PROPERTIES, None, blob # type: ignore[arg-type] + ) + assert reloaded.property_names == ("Source", "Build") + assert reloaded.get_property("Source").value == "cli" + assert reloaded.get_property("Build").value == 99 + + def it_assigns_unique_pids_across_adds(self): + part = CustomPropertiesPart.default(None) # type: ignore[arg-type] + a = part.add_property("a", "1") + b = part.add_property("b", "2") + c = part.add_property("c", "3") + assert (a.pid, b.pid, c.pid) == (2, 3, 4) diff --git a/tests/parts/test_custom_xml.py b/tests/parts/test_custom_xml.py new file mode 100644 index 000000000..6d9b58f95 --- /dev/null +++ b/tests/parts/test_custom_xml.py @@ -0,0 +1,286 @@ +# pyright: reportPrivateUsage=false + +"""Unit-test suite for `pptx.parts.custom_xml`.""" + +from __future__ import annotations + +import re + +import pytest +from lxml import etree + +from pptx.opc.constants import CONTENT_TYPE as CT +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.opc.packuri import PackURI +from pptx.oxml.custom_xml import CT_DatastoreItem +from pptx.oxml.ns import nsdecls +from pptx.parts.custom_xml import ( + CustomXmlPart, + CustomXmlPropertiesPart, + _next_customxml_index, + _parse_payload, +) + +_GUID_A = "{1A2B3C4D-5E6F-7890-ABCD-EF1234567890}" +_GUID_B = "{ABCDEF12-3456-7890-ABCD-EF1234567890}" +_GUID_RE = re.compile( + r"^\{[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}\}$" +) + + +class _StubPart: + """Minimal stand-in for an existing package part during partname allocation tests.""" + + def __init__(self, partname: str): + self.partname = PackURI(partname) + + +class _StubPackage: + """Minimal Package-like double exposing only `iter_parts()`. + + Sufficient because `CustomXmlPart.new_pair` and `_next_customxml_index` + consult `iter_parts()` for partname allocation and never call any other + method on the package during construction. + """ + + def __init__(self, partnames: list[str] | None = None): + self._parts = [_StubPart(p) for p in (partnames or [])] + + def iter_parts(self): + return iter(self._parts) + + +# --------------------------------------------------------------------------- +# CustomXmlPropertiesPart +# --------------------------------------------------------------------------- + + +def _datastore_xml(item_id: str, *uris: str) -> bytes: + schema_refs = "" + if uris: + schema_refs = "%s" % "".join( + '' % u for u in uris + ) + return ( + '%s' + % (nsdecls("ds"), item_id, schema_refs) + ).encode() + + +class DescribeCustomXmlPropertiesPart: + def it_constructs_via_new(self): + part = CustomXmlPropertiesPart.new( + None, # type: ignore[arg-type] + PackURI("/customXml/itemProps1.xml"), + _GUID_A, + schema_refs=("urn:foo", "urn:bar"), + ) + assert isinstance(part, CustomXmlPropertiesPart) + assert part.content_type == CT.OFC_CUSTOM_XML_PROPERTIES + assert part.partname == "/customXml/itemProps1.xml" + assert isinstance(part._element, CT_DatastoreItem) + assert part.datastore_item_id == _GUID_A + assert part.schema_refs == ("urn:foo", "urn:bar") + + def it_loads_from_blob(self): + part = CustomXmlPropertiesPart.load( + "/customXml/itemProps1.xml", + CT.OFC_CUSTOM_XML_PROPERTIES, + None, # type: ignore[arg-type] + _datastore_xml(_GUID_A, "urn:x"), + ) + assert part.datastore_item_id == _GUID_A + assert part.schema_refs == ("urn:x",) + + def it_can_change_the_datastore_item_id(self): + part = CustomXmlPropertiesPart.new( + None, PackURI("/customXml/itemProps1.xml"), _GUID_A # type: ignore[arg-type] + ) + part.datastore_item_id = _GUID_B + assert part.datastore_item_id == _GUID_B + + def it_adds_and_removes_schema_refs(self): + part = CustomXmlPropertiesPart.new( + None, PackURI("/customXml/itemProps1.xml"), _GUID_A # type: ignore[arg-type] + ) + part.add_schema_ref("urn:a") + part.add_schema_ref("urn:b") + assert part.schema_refs == ("urn:a", "urn:b") + assert part.remove_schema_ref("urn:a") is True + assert part.schema_refs == ("urn:b",) + assert part.remove_schema_ref("urn:missing") is False + + +# --------------------------------------------------------------------------- +# CustomXmlPart +# --------------------------------------------------------------------------- + + +class DescribeCustomXmlPart_new_pair: + def it_creates_paired_data_and_props_parts(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair( + pkg, # type: ignore[arg-type] + b'', + ) + assert isinstance(data, CustomXmlPart) + assert data.content_type == CT.XML + assert data.partname == "/customXml/item1.xml" + assert isinstance(data.props_part, CustomXmlPropertiesPart) + assert data.props_part.partname == "/customXml/itemProps1.xml" + + def it_wires_the_props_relationship(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair(pkg, b"") # type: ignore[arg-type] + rels = list(data.rels.values()) + assert len(rels) == 1 + assert rels[0].reltype == RT.CUSTOM_XML_PROPS + assert rels[0].target_part is data.props_part + + def it_auto_generates_a_curly_braced_guid_when_omitted(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair(pkg, b"") # type: ignore[arg-type] + assert _GUID_RE.match(data.datastore_item_id), data.datastore_item_id + + def it_accepts_a_caller_supplied_datastore_item_id(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair( + pkg, b"", datastore_item_id=_GUID_A # type: ignore[arg-type] + ) + assert data.datastore_item_id == _GUID_A + + def it_propagates_schema_refs_to_props_part(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair( + pkg, # type: ignore[arg-type] + b"", + schema_refs=("urn:a", "urn:b"), + ) + assert data.schema_refs == ("urn:a", "urn:b") + assert data.props_part.schema_refs == ("urn:a", "urn:b") + + @pytest.mark.parametrize( + "payload", + [ + b'', + '', + etree.fromstring(b""), + ], + ) + def it_accepts_payload_as_bytes_str_or_element(self, payload): + pkg = _StubPackage() + data = CustomXmlPart.new_pair(pkg, payload) # type: ignore[arg-type] + assert b"") # type: ignore[arg-type] + assert data.partname == "/customXml/item3.xml" + assert data.props_part.partname == "/customXml/itemProps3.xml" + + def it_reuses_a_gap_in_the_index_sequence(self): + pkg = _StubPackage( + partnames=[ + "/customXml/item1.xml", + "/customXml/itemProps1.xml", + "/customXml/item3.xml", + "/customXml/itemProps3.xml", + ] + ) + data = CustomXmlPart.new_pair(pkg, b"") # type: ignore[arg-type] + assert data.partname == "/customXml/item2.xml" + + +class DescribeCustomXmlPart_payload: + def it_exposes_the_live_root_element(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair( # type: ignore[arg-type] + pkg, b'cli' + ) + assert data.element.tag == "{urn:my:p}provenance" + + def it_round_trips_payload_through_blob(self): + pkg = _StubPackage() + original = b'hello' + data = CustomXmlPart.new_pair(pkg, original) # type: ignore[arg-type] + # blob is the same XML re-serialized + reparsed = etree.fromstring(data.blob) + assert reparsed.tag == "{urn:my}root" + assert reparsed.find("{urn:my}child").text == "hello" + + def it_replaces_the_payload_via_replace_xml(self): + pkg = _StubPackage() + data = CustomXmlPart.new_pair(pkg, b"") # type: ignore[arg-type] + original_id = data.datastore_item_id + data.replace_xml(b'') + assert b"") + assert elm.tag == "x" + + def it_parses_payload_str(self): + elm = _parse_payload("") + assert elm.tag == "x" + + def it_returns_passed_element_unchanged(self): + x = etree.fromstring(b"") + assert _parse_payload(x) is x + + def it_raises_TypeError_for_unsupported_payload(self): + with pytest.raises(TypeError): + _parse_payload(123) # type: ignore[arg-type] diff --git a/tests/parts/test_image.py b/tests/parts/test_image.py index 386e3fce9..35c186497 100644 --- a/tests/parts/test_image.py +++ b/tests/parts/test_image.py @@ -59,13 +59,13 @@ def it_provides_access_to_its_image(self, request, image_): assert image is image_ @pytest.mark.parametrize( - "width, height, expected_width, expected_height", - ( + ("width", "height", "expected_width", "expected_height"), + [ (None, None, Emu(2590800), Emu(2590800)), (1000, None, 1000, 1000), (None, 3000, 3000, 3000), (3337, 9999, 3337, 9999), - ), + ], ) def it_can_scale_its_dimensions(self, width, height, expected_width, expected_height): with open(test_image_path, "rb") as f: diff --git a/tests/parts/test_presentation.py b/tests/parts/test_presentation.py index edde4c44c..76ab8265c 100644 --- a/tests/parts/test_presentation.py +++ b/tests/parts/test_presentation.py @@ -168,7 +168,7 @@ def it_raises_on_slide_id_not_found(self, slide_part_, related_part_): with pytest.raises(ValueError): prs_part.slide_id(slide_part_) - @pytest.mark.parametrize("is_present", (True, False)) + @pytest.mark.parametrize("is_present", [True, False]) def it_finds_a_slide_by_slide_id(self, is_present, slide_, slide_part_, related_part_): prs_elm = element( "p:presentation/p:sldIdLst/(p:sldId{r:id=a,id=256},p:sldId{r:id=" diff --git a/tests/parts/test_slide.py b/tests/parts/test_slide.py index 9eb2f11b0..5497a841e 100644 --- a/tests/parts/test_slide.py +++ b/tests/parts/test_slide.py @@ -318,13 +318,13 @@ def it_can_add_a_chart_part(self, request, package_, relate_to_): assert rId == "rId42" @pytest.mark.parametrize( - "prog_id, rel_type", - ( + ("prog_id", "rel_type"), + [ (PROG_ID.DOCX, RT.PACKAGE), (PROG_ID.PPTX, RT.PACKAGE), (PROG_ID.XLSX, RT.PACKAGE), ("Foo.Bar.18", RT.OLE_OBJECT), - ), + ], ) def it_can_add_an_embedded_ole_object_part( self, request, package_, relate_to_, prog_id, rel_type @@ -390,7 +390,7 @@ def it_provides_access_to_the_slide_layout(self, layout_fixture): def it_knows_the_minimal_element_xml_for_a_slide(self): path = absjoin(test_file_dir, "minimal_slide.xml") sld = CT_Slide.new() - with open(path, "r") as f: + with open(path) as f: expected_xml = f.read() assert sld.xml == expected_xml diff --git a/tests/shapes/test_autoshape.py b/tests/shapes/test_autoshape.py index efb38e6b9..fe2142c61 100644 --- a/tests/shapes/test_autoshape.py +++ b/tests/shapes/test_autoshape.py @@ -102,7 +102,7 @@ def it_should_load_default_adjustment_values( def it_should_load_adj_val_actuals_from_xml(self, load_adj_actuals_fixture_): prstGeom, expected_actuals, prstGeom_xml = load_adj_actuals_fixture_ adjustments = AdjustmentCollection(prstGeom)._adjustments - actual_actuals = dict([(a.name, a.actual) for a in adjustments]) + actual_actuals = {a.name: a.actual for a in adjustments} assert actual_actuals == expected_actuals def it_provides_normalized_effective_value_on_indexed_access(self, indexed_access_fixture_): diff --git a/tests/shapes/test_graphfrm.py b/tests/shapes/test_graphfrm.py index 3324fcfe0..16188fc16 100644 --- a/tests/shapes/test_graphfrm.py +++ b/tests/shapes/test_graphfrm.py @@ -54,24 +54,24 @@ def it_provides_access_to_its_chart_part(self, request, chart_part_): assert chart_part is chart_part_ @pytest.mark.parametrize( - "graphicData_uri, expected_value", - ( + ("graphicData_uri", "expected_value"), + [ (GRAPHIC_DATA_URI_CHART, True), (GRAPHIC_DATA_URI_OLEOBJ, False), (GRAPHIC_DATA_URI_TABLE, False), - ), + ], ) def it_knows_whether_it_contains_a_chart(self, graphicData_uri, expected_value): graphicFrame = element("p:graphicFrame/a:graphic/a:graphicData{uri=%s}" % graphicData_uri) assert GraphicFrame(graphicFrame, None).has_chart is expected_value @pytest.mark.parametrize( - "graphicData_uri, expected_value", - ( + ("graphicData_uri", "expected_value"), + [ (GRAPHIC_DATA_URI_CHART, False), (GRAPHIC_DATA_URI_OLEOBJ, False), (GRAPHIC_DATA_URI_TABLE, True), - ), + ], ) def it_knows_whether_it_contains_a_table(self, graphicData_uri, expected_value): graphicFrame = element("p:graphicFrame/a:graphic/a:graphicData{uri=%s}" % graphicData_uri) @@ -112,14 +112,14 @@ def it_raises_on_shadow(self): graphic_frame.shadow @pytest.mark.parametrize( - "uri, oleObj_child, expected_value", - ( + ("uri", "oleObj_child", "expected_value"), + [ (GRAPHIC_DATA_URI_CHART, None, MSO_SHAPE_TYPE.CHART), (GRAPHIC_DATA_URI_OLEOBJ, "embed", MSO_SHAPE_TYPE.EMBEDDED_OLE_OBJECT), (GRAPHIC_DATA_URI_OLEOBJ, "link", MSO_SHAPE_TYPE.LINKED_OLE_OBJECT), (GRAPHIC_DATA_URI_TABLE, None, MSO_SHAPE_TYPE.TABLE), ("foobar", None, None), - ), + ], ) def it_knows_its_shape_type(self, uri, oleObj_child, expected_value): graphicFrame = element( diff --git a/tests/shapes/test_placeholder.py b/tests/shapes/test_placeholder.py index 4d9b26ea0..94db0fbd7 100644 --- a/tests/shapes/test_placeholder.py +++ b/tests/shapes/test_placeholder.py @@ -51,7 +51,7 @@ def it_provides_override_dimensions_when_present(self, override_fixture): placeholder, prop_name, expected_value = override_fixture assert getattr(placeholder, prop_name) == expected_value - @pytest.mark.parametrize("prop_name", ("left", "top", "width", "height")) + @pytest.mark.parametrize("prop_name", ["left", "top", "width", "height"]) def it_provides_inherited_dims_when_no_override(self, request, prop_name): method_mock(request, _BaseSlidePlaceholder, "_inherited_value", return_value=42) placeholder = _BaseSlidePlaceholder(element("p:sp/p:spPr"), None) @@ -463,8 +463,8 @@ def it_can_insert_a_picture_into_itself(self, request): assert placeholder_picture is placeholder_picture_ @pytest.mark.parametrize( - "image_size, crop_attr_names", - (((444, 333), ("l", "r")), ((333, 444), ("t", "b"))), + ("image_size", "crop_attr_names"), + [((444, 333), ("l", "r")), ((333, 444), ("t", "b"))], ) def it_creates_a_pic_element_to_help(self, request, image_size, crop_attr_names): _get_or_add_image_ = method_mock( diff --git a/tests/shapes/test_shapetree.py b/tests/shapes/test_shapetree.py index 3cf1ab225..80b75723d 100644 --- a/tests/shapes/test_shapetree.py +++ b/tests/shapes/test_shapetree.py @@ -118,12 +118,12 @@ def it_knows_how_many_shapes_it_contains(self, len_fixture): def it_can_iterate_over_the_shapes_it_contains(self, iter_fixture): shapes, expected_shapes, BaseShapeFactory_, calls = iter_fixture - assert [s for s in shapes] == expected_shapes + assert list(shapes) == expected_shapes assert BaseShapeFactory_.call_args_list == calls def it_iterates_shape_elements_to_help__iter__(self, iter_elms_fixture): shapes, expected_elms = iter_elms_fixture - assert [e for e in shapes._iter_member_elms()] == expected_elms + assert list(shapes._iter_member_elms()) == expected_elms def it_supports_indexed_access(self, getitem_fixture): shapes, idx, BaseShapeFactory_, sp, shape_ = getitem_fixture @@ -987,7 +987,7 @@ def it_can_iterate_over_its_placeholders(self, iter_fixture): placeholders, SlideShapeFactory_ = iter_fixture[:2] expected_calls, expected_values = iter_fixture[2:] - ps = [p for p in placeholders] + ps = list(placeholders) assert SlideShapeFactory_.call_args_list == expected_calls assert ps == expected_values @@ -2169,14 +2169,14 @@ def it_creates_the_graphicFrame_element(self, request): ) @pytest.mark.parametrize( - "cx_arg, prog_id, expected_value", - ( + ("cx_arg", "prog_id", "expected_value"), + [ (Emu(999999), None, Emu(999999)), (None, PROG_ID.DOCX, Emu(965200)), (None, PROG_ID.PPTX, Emu(965200)), (None, PROG_ID.XLSX, Emu(965200)), (None, "Foo.Bar.6", Emu(965200)), - ), + ], ) def it_determines_the_shape_width_to_help(self, cx_arg, prog_id, expected_value): element_creator = _OleObjectElementCreator( @@ -2185,14 +2185,14 @@ def it_determines_the_shape_width_to_help(self, cx_arg, prog_id, expected_value) assert element_creator._cx == expected_value @pytest.mark.parametrize( - "cy_arg, prog_id, expected_value", - ( + ("cy_arg", "prog_id", "expected_value"), + [ (Emu(666666), None, Emu(666666)), (None, PROG_ID.DOCX, Emu(609600)), (None, PROG_ID.PPTX, Emu(609600)), (None, PROG_ID.XLSX, Emu(609600)), (None, "Foo.Bar.6", Emu(609600)), - ), + ], ) def it_determines_the_shape_height_to_help(self, cy_arg, prog_id, expected_value): element_creator = _OleObjectElementCreator( @@ -2201,11 +2201,11 @@ def it_determines_the_shape_height_to_help(self, cy_arg, prog_id, expected_value assert element_creator._cy == expected_value @pytest.mark.parametrize( - "icon_height_arg, expected_value", - ( + ("icon_height_arg", "expected_value"), + [ (Emu(666666), Emu(666666)), (None, Emu(609600)), - ), + ], ) def it_determines_the_icon_height_to_help(self, icon_height_arg, expected_value): element_creator = _OleObjectElementCreator( @@ -2214,14 +2214,14 @@ def it_determines_the_icon_height_to_help(self, icon_height_arg, expected_value) assert element_creator._icon_height == expected_value @pytest.mark.parametrize( - "icon_file_arg, prog_id, expected_value", - ( + ("icon_file_arg", "prog_id", "expected_value"), + [ ("user-icon.png", PROG_ID.XLSX, "user-icon.png"), (None, "Foo.Bar.18", "generic-icon.emf"), (None, PROG_ID.DOCX, "docx-icon.emf"), (None, PROG_ID.PPTX, "pptx-icon.emf"), (None, PROG_ID.XLSX, "xlsx-icon.emf"), - ), + ], ) def it_resolves_the_icon_image_file_to_help(self, icon_file_arg, prog_id, expected_value): element_creator = _OleObjectElementCreator( @@ -2250,8 +2250,8 @@ def it_adds_and_relates_the_icon_image_part_to_help( assert rId == "rId16" @pytest.mark.parametrize( - "icon_width_arg, expected_value", - ((Emu(666666), Emu(666666)), (None, Emu(965200))), + ("icon_width_arg", "expected_value"), + [(Emu(666666), Emu(666666)), (None, Emu(965200))], ) def it_determines_the_icon_width_to_help(self, icon_width_arg, expected_value): element_creator = _OleObjectElementCreator( @@ -2287,13 +2287,13 @@ def it_adds_and_relates_the_ole_object_part_to_help( assert rId == "rId14" @pytest.mark.parametrize( - "prog_id_arg, expected_value", - ( + ("prog_id_arg", "expected_value"), + [ (PROG_ID.DOCX, "Word.Document.12"), (PROG_ID.PPTX, "PowerPoint.Show.12"), (PROG_ID.XLSX, "Excel.Sheet.12"), ("Something.Else.42", "Something.Else.42"), - ), + ], ) def it_resolves_the_progId_str_to_help(self, prog_id_arg, expected_value): element_creator = _OleObjectElementCreator( diff --git a/tests/test_custom_properties.py b/tests/test_custom_properties.py new file mode 100644 index 000000000..64238d1ef --- /dev/null +++ b/tests/test_custom_properties.py @@ -0,0 +1,233 @@ +# pyright: reportPrivateUsage=false + +"""End-to-end test suite for `pptx.custom_properties.CustomProperties`.""" + +from __future__ import annotations + +import datetime as dt +from io import BytesIO + +import pytest + +from pptx import Presentation +from pptx.custom_properties import CustomProperties +from pptx.parts.custom_properties import CustomPropertiesPart + + +@pytest.fixture +def empty_prs(): + """Return a fresh Presentation built from the default template.""" + return Presentation() + + +def _roundtrip(prs): + """Save `prs` to BytesIO and return a freshly-reloaded Presentation.""" + buf = BytesIO() + prs.save(buf) + buf.seek(0) + return Presentation(buf) + + +class DescribeCustomProperties_Mapping: + def it_starts_empty_for_a_default_presentation(self, empty_prs): + cp = empty_prs.custom_properties + assert isinstance(cp, CustomProperties) + assert len(cp) == 0 + assert list(cp) == [] + assert "anything" not in cp + + def it_writes_and_reads_a_string_value(self, empty_prs): + empty_prs.custom_properties["Source"] = "cli@1.4" + assert empty_prs.custom_properties["Source"] == "cli@1.4" + assert "Source" in empty_prs.custom_properties + + def it_dispatches_value_types_on_assignment(self, empty_prs): + empty_prs.custom_properties["S"] = "string" + empty_prs.custom_properties["I"] = 42 + empty_prs.custom_properties["F"] = 3.14 + empty_prs.custom_properties["B"] = True + empty_prs.custom_properties["D"] = dt.datetime(2026, 5, 5, 14, 0, 0) + + assert empty_prs.custom_properties["S"] == "string" + assert empty_prs.custom_properties["I"] == 42 + assert empty_prs.custom_properties["F"] == pytest.approx(3.14) + assert empty_prs.custom_properties["B"] is True + assert empty_prs.custom_properties["D"] == dt.datetime(2026, 5, 5, 14, 0, 0) + + def it_replaces_an_existing_value_on_repeated_assignment(self, empty_prs): + empty_prs.custom_properties["X"] = "old" + empty_prs.custom_properties["X"] = "new" + assert empty_prs.custom_properties["X"] == "new" + assert len(empty_prs.custom_properties) == 1 + + def it_replaces_value_with_a_different_type(self, empty_prs): + empty_prs.custom_properties["X"] = "hello" + empty_prs.custom_properties["X"] = 42 + assert empty_prs.custom_properties["X"] == 42 + + def it_raises_KeyError_on_missing_lookup(self, empty_prs): + with pytest.raises(KeyError): + empty_prs.custom_properties["missing"] + + def it_deletes_a_property(self, empty_prs): + empty_prs.custom_properties["X"] = "a" + del empty_prs.custom_properties["X"] + assert "X" not in empty_prs.custom_properties + + def it_raises_KeyError_on_delete_missing(self, empty_prs): + with pytest.raises(KeyError): + del empty_prs.custom_properties["missing"] + + def it_supports_iter_keys_values_items_get(self, empty_prs): + empty_prs.custom_properties["a"] = "1" + empty_prs.custom_properties["b"] = 2 + empty_prs.custom_properties["c"] = True + + assert list(empty_prs.custom_properties) == ["a", "b", "c"] + assert list(empty_prs.custom_properties.keys()) == ["a", "b", "c"] + assert dict(empty_prs.custom_properties.items()) == {"a": "1", "b": 2, "c": True} + assert empty_prs.custom_properties.get("missing") is None + assert empty_prs.custom_properties.get("missing", "default") == "default" + + def it_raises_TypeError_on_unsupported_value(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_properties["X"] = object() # type: ignore[assignment] + + +class DescribeCustomProperties_edge_cases: + def it_returns_False_for_non_string_contains(self, empty_prs): + empty_prs.custom_properties["X"] = "v" + assert (42 in empty_prs.custom_properties) is False # type: ignore[operator] + + def it_treats_a_property_with_no_value_child_as_absent(self, empty_prs): + # Force a malformed entry: an op:property element with no vt:* child. + # The lookup returns None → CustomProperties surfaces it as KeyError. + from pptx.oxml import parse_xml + from pptx.oxml.custom_properties import DEFAULT_FMTID + from pptx.oxml.ns import nsdecls + + cp_part = empty_prs.part.package.custom_properties_part + # Replace _element with a malformed Properties root containing one + # property that has no value child. + broken = parse_xml( + ( + "" + '' + "" % (nsdecls("op", "vt"), DEFAULT_FMTID) + ).encode() + ) + cp_part._element = broken + with pytest.raises(KeyError): + _ = empty_prs.custom_properties["empty"] + + +class DescribeCustomProperties_explicit_setters: + def it_writes_string_with_set_string(self, empty_prs): + # set_string("X", "42") writes vt:lpwstr, not vt:i4 + empty_prs.custom_properties.set_string("X", "42") + assert empty_prs.custom_properties["X"] == "42" + # confirm the underlying element is vt:lpwstr + prop = empty_prs.part.package.custom_properties_part.get_property("X") + assert prop is not None + assert prop.lpwstr is not None + assert prop.i4 is None + + def it_writes_int_with_set_int_rejecting_bool(self, empty_prs): + empty_prs.custom_properties.set_int("X", 5) + assert empty_prs.custom_properties["X"] == 5 + with pytest.raises(TypeError): + empty_prs.custom_properties.set_int("X", True) # type: ignore[arg-type] + + def it_writes_float_with_set_float(self, empty_prs): + empty_prs.custom_properties.set_float("X", 3.14) + prop = empty_prs.part.package.custom_properties_part.get_property("X") + assert prop is not None + assert prop.r8 is not None + + def it_writes_bool_with_set_bool(self, empty_prs): + empty_prs.custom_properties.set_bool("X", False) + assert empty_prs.custom_properties["X"] is False + with pytest.raises(TypeError): + empty_prs.custom_properties.set_bool("X", 0) # type: ignore[arg-type] + + def it_writes_datetime_with_set_datetime(self, empty_prs): + empty_prs.custom_properties.set_datetime( + "When", dt.datetime(2026, 1, 1, 0, 0, 0) + ) + assert empty_prs.custom_properties["When"] == dt.datetime(2026, 1, 1, 0, 0, 0) + + def it_rejects_set_string_with_non_string(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_properties.set_string("X", 42) # type: ignore[arg-type] + + def it_rejects_set_float_with_bool_or_non_number(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_properties.set_float("X", True) # type: ignore[arg-type] + with pytest.raises(TypeError): + empty_prs.custom_properties.set_float("X", "1.0") # type: ignore[arg-type] + + def it_rejects_set_datetime_with_non_datetime(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_properties.set_datetime("X", "today") # type: ignore[arg-type] + + def it_overwrites_an_existing_value_via_set_string(self, empty_prs): + empty_prs.custom_properties["X"] = 42 + empty_prs.custom_properties.set_string("X", "now-a-string") + assert empty_prs.custom_properties["X"] == "now-a-string" + + +class DescribeCustomProperties_lazy_creation: + def it_creates_the_part_on_first_access(self, empty_prs): + # default presentation has no custom_properties_part yet — the lazy + # access path must create one. + cp_part = empty_prs.part.package.custom_properties_part + assert isinstance(cp_part, CustomPropertiesPart) + # Mapping wrapper finds it + assert isinstance(empty_prs.custom_properties, CustomProperties) + + def it_returns_the_same_wrapper_class_each_call(self, empty_prs): + # Different instances are fine (CustomProperties is a thin facade) — + # what matters is that they wrap the same underlying part. + a = empty_prs.custom_properties + b = empty_prs.custom_properties + assert a._part is b._part + + +class DescribeCustomProperties_roundtrip: + def it_round_trips_through_save_load(self, empty_prs): + empty_prs.custom_properties["Source"] = "cli@1.4.2" + empty_prs.custom_properties["BuildNumber"] = 42 + empty_prs.custom_properties["IsDraft"] = True + empty_prs.custom_properties["At"] = dt.datetime(2026, 5, 5, 14, 0, 0) + + reloaded = _roundtrip(empty_prs) + + assert reloaded.custom_properties["Source"] == "cli@1.4.2" + assert reloaded.custom_properties["BuildNumber"] == 42 + assert reloaded.custom_properties["IsDraft"] is True + assert reloaded.custom_properties["At"] == dt.datetime(2026, 5, 5, 14, 0, 0) + + def it_preserves_core_properties_alongside_custom_ones(self, empty_prs): + # both can coexist; custom_properties is /docProps/custom.xml, + # core_properties is /docProps/core.xml — distinct parts + empty_prs.core_properties.author = "Athena" + empty_prs.custom_properties["Source"] = "cli" + reloaded = _roundtrip(empty_prs) + assert reloaded.core_properties.author == "Athena" + assert reloaded.custom_properties["Source"] == "cli" + + def it_is_a_noop_when_never_touched(self, empty_prs): + # if the API is not used, no /docProps/custom.xml is added (the part + # is created lazily ON first call to .custom_properties_part). A bare + # save() that never touches the API should leave the package alone. + buf = BytesIO() + empty_prs.save(buf) + # Reopen and confirm no custom_properties_part rel exists yet + buf.seek(0) + reloaded = Presentation(buf) + # accessing custom_properties for the first time HERE creates it, + # but pre-access there should be no rel of CUSTOM_PROPERTIES type + from pptx.opc.constants import RELATIONSHIP_TYPE as RT + + rel_types = {r.reltype for r in reloaded.part.package._rels.values()} + assert RT.CUSTOM_PROPERTIES not in rel_types diff --git a/tests/test_custom_xml.py b/tests/test_custom_xml.py new file mode 100644 index 000000000..6bf4c1d63 --- /dev/null +++ b/tests/test_custom_xml.py @@ -0,0 +1,333 @@ +# pyright: reportPrivateUsage=false + +"""End-to-end test suite for `pptx.custom_xml.CustomXmlParts`.""" + +from __future__ import annotations + +from io import BytesIO + +import pytest + +from pptx import Presentation +from pptx.custom_xml import ( + NAME_PROPERTY_PREFIX, + CustomXmlParts, + _normalize_guid, + _upgrade_to_custom_xml_part, +) +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from pptx.parts.custom_xml import CustomXmlPart + + +@pytest.fixture +def empty_prs(): + return Presentation() + + +def _roundtrip(prs): + buf = BytesIO() + prs.save(buf) + buf.seek(0) + return Presentation(buf) + + +class DescribeCustomXmlParts_basic: + def it_starts_empty_for_a_default_presentation(self, empty_prs): + cxp = empty_prs.custom_xml_parts + assert isinstance(cxp, CustomXmlParts) + assert len(cxp) == 0 + assert list(cxp) == [] + + def it_adds_a_part_with_default_presentation_scope(self, empty_prs): + part = empty_prs.custom_xml_parts.add(b'') + assert isinstance(part, CustomXmlPart) + assert part.partname == "/customXml/item1.xml" + # presentation scope: rel from the presentation part + rel_types_at_prs = {r.reltype for r in empty_prs.part.rels.values()} + rel_types_at_pkg = {r.reltype for r in empty_prs.part.package._rels.values()} + assert RT.CUSTOM_XML in rel_types_at_prs + assert RT.CUSTOM_XML not in rel_types_at_pkg + + def it_adds_a_part_with_package_scope_when_requested(self, empty_prs): + empty_prs.custom_xml_parts.add(b"", scope="package") + rel_types_at_prs = {r.reltype for r in empty_prs.part.rels.values()} + rel_types_at_pkg = {r.reltype for r in empty_prs.part.package._rels.values()} + assert RT.CUSTOM_XML in rel_types_at_pkg + assert RT.CUSTOM_XML not in rel_types_at_prs + + def it_rejects_unknown_scope(self, empty_prs): + with pytest.raises(ValueError): + empty_prs.custom_xml_parts.add(b"", scope="bogus") # type: ignore[arg-type] + + def it_walks_both_scopes_in_iteration(self, empty_prs): + empty_prs.custom_xml_parts.add(b'', scope="presentation") + empty_prs.custom_xml_parts.add(b'', scope="package") + partnames = [str(p.partname) for p in empty_prs.custom_xml_parts] + assert len(partnames) == 2 + + def it_assigns_distinct_partnames_to_consecutive_pairs(self, empty_prs): + a = empty_prs.custom_xml_parts.add(b"") + b = empty_prs.custom_xml_parts.add(b"") + assert a.partname != b.partname + assert str(a.partname) == "/customXml/item1.xml" + assert str(b.partname) == "/customXml/item2.xml" + + +class DescribeCustomXmlParts_lookups: + def it_indexes_by_position(self, empty_prs): + a = empty_prs.custom_xml_parts.add(b'') + b = empty_prs.custom_xml_parts.add(b'') + assert empty_prs.custom_xml_parts[0] is a + assert empty_prs.custom_xml_parts[1] is b + + def it_raises_IndexError_on_out_of_range(self, empty_prs): + empty_prs.custom_xml_parts.add(b"") + with pytest.raises(IndexError): + empty_prs.custom_xml_parts[5] + + def it_indexes_by_partname_tail(self, empty_prs): + empty_prs.custom_xml_parts.add(b"") + empty_prs.custom_xml_parts.add(b"") + found = empty_prs.custom_xml_parts["item2.xml"] + assert str(found.partname) == "/customXml/item2.xml" + + def it_raises_KeyError_on_unknown_partname(self, empty_prs): + empty_prs.custom_xml_parts.add(b"") + with pytest.raises(KeyError): + empty_prs.custom_xml_parts["item99.xml"] + + def it_raises_TypeError_on_other_key_types(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_xml_parts[1.5] # type: ignore[index] + + def it_finds_by_guid_brace_tolerant(self, empty_prs): + guid = "{ABCDEF12-3456-7890-ABCD-EF1234567890}" + empty_prs.custom_xml_parts.add(b"", datastoreItem_id=guid) + # exact form + assert empty_prs.custom_xml_parts.by_guid(guid) is not None + # without braces, lowercase + assert ( + empty_prs.custom_xml_parts.by_guid("abcdef12-3456-7890-abcd-ef1234567890") + is not None + ) + + def it_returns_None_for_unknown_guid(self, empty_prs): + empty_prs.custom_xml_parts.add(b"") + assert empty_prs.custom_xml_parts.by_guid("{00000000-0000-0000-0000-000000000000}") is None + + def it_finds_by_user_assigned_name(self, empty_prs): + added = empty_prs.custom_xml_parts.add( + b'', + name="provenance", + ) + assert empty_prs.custom_xml_parts.by_name("provenance") is added + + def it_returns_None_for_unknown_name(self, empty_prs): + empty_prs.custom_xml_parts.add(b"", name="real") + assert empty_prs.custom_xml_parts.by_name("missing") is None + + def it_raises_TypeError_on_non_str_name(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_xml_parts.by_name(42) # type: ignore[arg-type] + + +class DescribeCustomXmlParts_remove: + def it_removes_by_part_instance(self, empty_prs): + a = empty_prs.custom_xml_parts.add(b"", name="a") + empty_prs.custom_xml_parts.add(b"", name="b") + empty_prs.custom_xml_parts.remove(a) + assert len(empty_prs.custom_xml_parts) == 1 + # name entry also removed + assert ( + empty_prs.part.package.custom_properties_part.get_property( + NAME_PROPERTY_PREFIX + a.datastore_item_id + ) + is None + ) + + def it_removes_by_index(self, empty_prs): + empty_prs.custom_xml_parts.add(b"") + empty_prs.custom_xml_parts.add(b"") + empty_prs.custom_xml_parts.remove(0) + assert len(empty_prs.custom_xml_parts) == 1 + + def it_removes_by_partname_tail(self, empty_prs): + empty_prs.custom_xml_parts.add(b"") + empty_prs.custom_xml_parts.add(b"") + empty_prs.custom_xml_parts.remove("item1.xml") + assert str(empty_prs.custom_xml_parts[0].partname) == "/customXml/item2.xml" + + def it_is_idempotent(self, empty_prs): + a = empty_prs.custom_xml_parts.add(b"") + empty_prs.custom_xml_parts.remove(a) + empty_prs.custom_xml_parts.remove(a) # no error + assert len(empty_prs.custom_xml_parts) == 0 + + def it_removes_a_package_scoped_part(self, empty_prs): + a = empty_prs.custom_xml_parts.add(b"", scope="package") + empty_prs.custom_xml_parts.remove(a) + assert len(empty_prs.custom_xml_parts) == 0 + rel_types = {r.reltype for r in empty_prs.part.package._rels.values()} + assert RT.CUSTOM_XML not in rel_types + + def it_raises_TypeError_on_unsupported_remove_arg(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_xml_parts.remove(1.5) # type: ignore[arg-type] + + +class DescribeCustomXmlParts_roundtrip: + def it_round_trips_added_parts(self, empty_prs): + empty_prs.custom_xml_parts.add( + b'cli', + name="provenance", + schema_refs=["urn:my:p"], + ) + empty_prs.custom_xml_parts.add(b"", name="extra", scope="package") + + reloaded = _roundtrip(empty_prs) + + assert len(reloaded.custom_xml_parts) == 2 + prov = reloaded.custom_xml_parts.by_name("provenance") + assert prov is not None + assert prov.element.tag == "{urn:my:p}provenance" + assert prov.schema_refs == ("urn:my:p",) + extra = reloaded.custom_xml_parts.by_name("extra") + assert extra is not None + assert extra.element.tag == "extra" + + def it_preserves_payload_text_byte_for_byte_through_lxml_roundtrip(self, empty_prs): + original = b'hello' + added = empty_prs.custom_xml_parts.add(original) + guid = added.datastore_item_id + + reloaded = _roundtrip(empty_prs) + part = reloaded.custom_xml_parts.by_guid(guid) + assert part is not None + # parsed structure is preserved + child = part.element.find("{u:r}child") + assert child is not None + assert child.get("a") == "1" + assert child.text == "hello" + + def it_replaces_xml_payload_in_place(self, empty_prs): + added = empty_prs.custom_xml_parts.add(b"") + guid = added.datastore_item_id + added.replace_xml(b'') + + reloaded = _roundtrip(empty_prs) + part = reloaded.custom_xml_parts.by_guid(guid) + assert part is not None + assert part.element.tag == "{u:n}new" + + def it_supports_add_item_convenience(self, empty_prs): + added = empty_prs.custom_xml_parts.add(b'') + added.add_item("item", "first") + added.add_item("item", "second", priority="high") + + # children are present + children = list(added.element) + assert len(children) == 2 + assert children[0].text == "first" + assert children[1].get("priority") == "high" + + +class DescribeCustomXmlParts_string_blob: + def it_adds_a_string_blob(self, empty_prs): + part = empty_prs.custom_xml_parts.add_string_blob( + "readme", "# Hello\nworld", mime_hint="text/markdown" + ) + assert isinstance(part, CustomXmlPart) + assert part.element.tag == "{urn:python-pptx:blob}blob" + assert part.element.get("name") == "readme" + assert part.element.get("mime") == "text/markdown" + assert part.element.get("encoding") == "text" + assert part.element.text == "# Hello\nworld" + + def it_reads_back_a_string_blob_by_name(self, empty_prs): + empty_prs.custom_xml_parts.add_string_blob("note", "secret message") + assert empty_prs.custom_xml_parts.read_string_blob("note") == "secret message" + + def it_returns_None_for_missing_blob(self, empty_prs): + assert empty_prs.custom_xml_parts.read_string_blob("missing") is None + + def it_returns_None_for_a_non_blob_part(self, empty_prs): + empty_prs.custom_xml_parts.add(b'', name="other") + # name lookup finds the part, but it's not the blob envelope shape + assert empty_prs.custom_xml_parts.read_string_blob("other") is None + assert empty_prs.custom_xml_parts.blob_encoding("other") is None + + def it_round_trips_a_string_blob(self, empty_prs): + empty_prs.custom_xml_parts.add_string_blob("md", "content") + reloaded = _roundtrip(empty_prs) + assert reloaded.custom_xml_parts.read_string_blob("md") == "content" + assert reloaded.custom_xml_parts.blob_encoding("md") == "text" + + def it_supports_base64_encoding(self, empty_prs): + encoded = "aGVsbG8gd29ybGQ=" # b64 of "hello world" + empty_prs.custom_xml_parts.add_string_blob("bin", encoded, encoding="base64") + assert empty_prs.custom_xml_parts.read_string_blob("bin") == encoded + assert empty_prs.custom_xml_parts.blob_encoding("bin") == "base64" + + def it_rejects_empty_name(self, empty_prs): + with pytest.raises(ValueError): + empty_prs.custom_xml_parts.add_string_blob("", "content") + + def it_rejects_non_string_content(self, empty_prs): + with pytest.raises(TypeError): + empty_prs.custom_xml_parts.add_string_blob("x", 42) # type: ignore[arg-type] + + def it_rejects_unknown_encoding(self, empty_prs): + with pytest.raises(ValueError): + empty_prs.custom_xml_parts.add_string_blob( + "x", "content", encoding="utf-7" # type: ignore[arg-type] + ) + + def it_supports_package_scope(self, empty_prs): + from pptx.opc.constants import RELATIONSHIP_TYPE as RT_ + + empty_prs.custom_xml_parts.add_string_blob( + "x", "content", scope="package" + ) + rel_types = {r.reltype for r in empty_prs.part.package._rels.values()} + assert RT_.CUSTOM_XML in rel_types + + +class DescribeCustomXmlPart_name_edge_cases: + def it_returns_None_when_no_name_property_for_the_guid(self, empty_prs): + # Add a part WITHOUT a name. .name should return None even though the + # custom_properties part does exist (other entries may have been written). + empty_prs.custom_properties["AnythingElse"] = "value" + added = empty_prs.custom_xml_parts.add(b"") + assert added.name is None + + +class DescribeUpgradeAndHelpers: + def it_upgrades_a_loaded_base_part_to_CustomXmlPart_on_iteration(self, empty_prs): + empty_prs.custom_xml_parts.add(b'') + reloaded = _roundtrip(empty_prs) + # Force iteration; the base Part loaded for the customXml/item1.xml + # part gets upgraded to CustomXmlPart in place. + first = next(iter(reloaded.custom_xml_parts)) + assert isinstance(first, CustomXmlPart) + assert first.element.tag == "{u:x}x" + + def it_passes_through_an_already_upgraded_part_unchanged(self, empty_prs): + added = empty_prs.custom_xml_parts.add(b"") + # the just-added part is already a CustomXmlPart + same = _upgrade_to_custom_xml_part(added) + assert same is added + + @pytest.mark.parametrize( + ("input_guid", "expected"), + [ + ("{ABCDEF12-3456-7890-ABCD-EF1234567890}", "abcdef12-3456-7890-abcd-ef1234567890"), + ("abcdef12-3456-7890-abcd-ef1234567890", "abcdef12-3456-7890-abcd-ef1234567890"), + (" {AbCdEf12-3456-7890-ABCD-EF1234567890} ", "abcdef12-3456-7890-abcd-ef1234567890"), + ], + ) + def it_normalizes_guids_for_comparison(self, input_guid, expected): + assert _normalize_guid(input_guid) == expected + + def it_raises_TypeError_on_non_str_guid_normalize(self): + with pytest.raises(TypeError): + _normalize_guid(42) # type: ignore[arg-type] diff --git a/tests/test_files/customxml/README.rst b/tests/test_files/customxml/README.rst new file mode 100644 index 000000000..52bce6981 --- /dev/null +++ b/tests/test_files/customxml/README.rst @@ -0,0 +1,30 @@ +customXml integration test fixtures +==================================== + +These ``.pptx`` files are **synthetic** — generated by ``python-pptx-extended`` +itself rather than captured from third-party tools. They cover the topologies +the integration tests need without licensing complications: + +================================ ============================================================== +File What it exercises +================================ ============================================================== +``presentation-scoped.pptx`` Single customXml part rooted at ``ppt/_rels/presentation.xml.rels`` + (the default scope our ``add(...)`` writes; matches Office.js) +``package-scoped.pptx`` Single customXml part rooted at ``_rels/.rels`` (the VSTO / + SharePoint topology; ``scope="package"`` override) +``multipart.pptx`` Two customXml parts at mixed scopes, custom document + properties, and a string-blob envelope. Exercises every + Phase-1 through Phase-4 surface in one file. +``clean.pptx`` A bare presentation with no customXml at all. Regression + baseline — saving and reloading must produce no + ``RT.CUSTOM_XML`` or ``RT.CUSTOM_PROPERTIES`` rels. +================================ ============================================================== + +The generation script lives next to this file at +``tests/test_files/customxml/_generate_fixtures.py``. Re-run it whenever the +fixture shape needs to change. + +For real third-party validation (SharePoint-saved, Office.js-produced, +VSTO-tooled output) the maintainer will capture additional fixtures during +manual PowerPoint UI testing — see ``Plans/customxml-implementation-plan.md`` +§5.4. Those land here later under names like ``sharepoint-saved.pptx`` etc. diff --git a/tests/test_files/customxml/_generate_fixtures.py b/tests/test_files/customxml/_generate_fixtures.py new file mode 100644 index 000000000..86bd30b83 --- /dev/null +++ b/tests/test_files/customxml/_generate_fixtures.py @@ -0,0 +1,84 @@ +"""Re-generate the synthetic .pptx fixtures used by the customXml integration tests. + +Run from the repo root:: + + python3 tests/test_files/customxml/_generate_fixtures.py + +Outputs are deterministic except for auto-assigned `datastoreItem` GUIDs; +explicit GUIDs below pin them so the resulting files round-trip byte-for-byte +when re-generated. +""" + +from __future__ import annotations + +import os + +from pptx import Presentation + +_HERE = os.path.dirname(os.path.abspath(__file__)) + + +def _path(name: str) -> str: + return os.path.join(_HERE, name) + + +def write_presentation_scoped() -> None: + prs = Presentation() + prs.custom_xml_parts.add( + b'' + b"integration-fixture" + b"2026-05-05T17:00:00Z" + b"", + name="provenance", + schema_refs=["urn:my:provenance"], + datastoreItem_id="{1A2B3C4D-5E6F-7890-ABCD-EF1234567890}", + ) + prs.save(_path("presentation-scoped.pptx")) + + +def write_package_scoped() -> None: + prs = Presentation() + prs.custom_xml_parts.add( + b'' + b"" + b"", + name="vsto", + scope="package", + datastoreItem_id="{ABCDEF12-3456-7890-ABCD-EF1234567890}", + ) + prs.save(_path("package-scoped.pptx")) + + +def write_multipart() -> None: + prs = Presentation() + prs.custom_properties["Source"] = "deck-builder-cli@1.4.2" + prs.custom_properties["BuildNumber"] = 42 + prs.custom_properties["IsDraft"] = True + prs.custom_xml_parts.add( + b'cli', + name="provenance", + schema_refs=["urn:my:p"], + ) + prs.custom_xml_parts.add(b"", name="extra", scope="package") + prs.custom_xml_parts.add_string_blob( + "readme", + "# Hello\n\nThis is markdown content embedded in the .pptx.", + mime_hint="text/markdown", + ) + prs.save(_path("multipart.pptx")) + + +def write_clean() -> None: + Presentation().save(_path("clean.pptx")) + + +def main() -> None: + write_presentation_scoped() + write_package_scoped() + write_multipart() + write_clean() + print("regenerated fixtures in", _HERE) + + +if __name__ == "__main__": + main() diff --git a/tests/test_files/customxml/clean.pptx b/tests/test_files/customxml/clean.pptx new file mode 100644 index 000000000..0845db094 Binary files /dev/null and b/tests/test_files/customxml/clean.pptx differ diff --git a/tests/test_files/customxml/multipart.pptx b/tests/test_files/customxml/multipart.pptx new file mode 100644 index 000000000..f01e98641 Binary files /dev/null and b/tests/test_files/customxml/multipart.pptx differ diff --git a/tests/test_files/customxml/package-scoped.pptx b/tests/test_files/customxml/package-scoped.pptx new file mode 100644 index 000000000..7c1a65f15 Binary files /dev/null and b/tests/test_files/customxml/package-scoped.pptx differ diff --git a/tests/test_files/customxml/presentation-scoped.pptx b/tests/test_files/customxml/presentation-scoped.pptx new file mode 100644 index 000000000..04a25a767 Binary files /dev/null and b/tests/test_files/customxml/presentation-scoped.pptx differ diff --git a/tests/test_slide.py b/tests/test_slide.py index 3339c3796..7a94b5ec2 100644 --- a/tests/test_slide.py +++ b/tests/test_slide.py @@ -499,7 +499,7 @@ def it_raises_on_slide_not_in_collection(self, raises_fixture): def it_can_iterate_its_slides(self, iter_fixture): slides, related_slide_, calls, expected_value = iter_fixture - slide_lst = [s for s in slides] + slide_lst = list(slides) assert related_slide_.call_args_list == calls assert slide_lst == expected_value @@ -791,7 +791,7 @@ def it_can_iterate_its_slide_layouts(self, part_prop_, slide_master_part_): related_slide_layout_.side_effect = _slide_layouts slide_layouts = SlideLayouts(sldLayoutIdLst, None) - slide_layout_lst = [sl for sl in slide_layouts] + slide_layout_lst = list(slide_layouts) assert related_slide_layout_.call_args_list == [call("a"), call("b")] assert slide_layout_lst == _slide_layouts @@ -974,7 +974,7 @@ def it_knows_how_many_masters_it_contains(self, len_fixture): def it_can_iterate_the_slide_masters(self, iter_fixture): slide_masters, related_slide_master_, calls, expected_values = iter_fixture - _slide_masters = [sm for sm in slide_masters] + _slide_masters = list(slide_masters) assert related_slide_master_.call_args_list == calls assert _slide_masters == expected_values @@ -1045,15 +1045,15 @@ class Describe_Background(object): """Unit-test suite for `pptx.slide._Background` objects.""" @pytest.mark.parametrize( - "cSld_xml, expected_cxml", - ( + ("cSld_xml", "expected_cxml"), + [ ("p:cSld{a:b=c}", "p:cSld{a:b=c}/p:bg/p:bgPr/(a:noFill,a:effectLst)"), ( "p:cSld{a:b=c}/p:bg/p:bgRef", "p:cSld{a:b=c}/p:bg/p:bgPr/(a:noFill,a:effectLst)", ), ("p:cSld/p:bg/p:bgPr/a:solidFill", "p:cSld/p:bg/p:bgPr/a:solidFill"), - ), + ], ) def it_provides_access_to_its_fill(self, request, cSld_xml, expected_cxml): fill_ = instance_mock(request, FillFormat) diff --git a/tests/text/test_layout.py b/tests/text/test_layout.py index 6e2c83d6a..cb9fdc10a 100644 --- a/tests/text/test_layout.py +++ b/tests/text/test_layout.py @@ -51,12 +51,12 @@ def it_finds_best_fit_font_size_to_help_best_fit(self, _best_fit_fixture): assert font_size is font_size_ @pytest.mark.parametrize( - "extents, point_size, text_lines, expected_value", - ( + ("extents", "point_size", "text_lines", "expected_value"), + [ ((66, 99), 6, ("foo", "bar"), False), ((66, 100), 6, ("foo", "bar"), True), ((66, 101), 6, ("foo", "bar"), True), - ), + ], ) def it_provides_a_fits_inside_predicate_fn( self, diff --git a/tests/text/test_text.py b/tests/text/test_text.py index 73343b2b6..48bbc5bc1 100644 --- a/tests/text/test_text.py +++ b/tests/text/test_text.py @@ -70,12 +70,12 @@ def it_can_change_its_autosize_setting( @pytest.mark.parametrize( "txBody_cxml", - ( + [ "p:txBody/(a:p,a:p,a:p)", 'p:txBody/a:p/a:r/a:t"foo"', 'p:txBody/a:p/(a:br,a:r/a:t"foo")', 'p:txBody/a:p/(a:fld,a:br,a:r/a:t"foo")', - ), + ], ) def it_can_clear_itself_of_content(self, txBody_cxml): text_frame = TextFrame(element(txBody_cxml), None) @@ -1210,12 +1210,12 @@ def it_can_get_the_text_of_the_run(self, text_get_fixture): assert isinstance(text, str) @pytest.mark.parametrize( - "r_cxml, new_value, expected_r_cxml", - ( + ("r_cxml", "new_value", "expected_r_cxml"), + [ ("a:r/a:t", "barfoo", 'a:r/a:t"barfoo"'), ("a:r/a:t", "bar\x1bfoo", 'a:r/a:t"bar_x001B_foo"'), ("a:r/a:t", "bar\tfoo", 'a:r/a:t"bar\tfoo"'), - ), + ], ) def it_can_change_its_text(self, r_cxml, new_value, expected_r_cxml): run = _Run(element(r_cxml), None)