diff --git a/crates/paperjam-docx/src/lib.rs b/crates/paperjam-docx/src/lib.rs index fd0a358..b0fcb1f 100644 --- a/crates/paperjam-docx/src/lib.rs +++ b/crates/paperjam-docx/src/lib.rs @@ -74,12 +74,11 @@ impl DocumentTrait for DocxDocument { } fn save_to_bytes(&self) -> Result, Self::Error> { - let mut buf = std::io::Cursor::new(Vec::new()); - self.inner - .clone() - .build() - .pack(&mut buf) - .map_err(|e| DocxError::Io(std::io::Error::other(e)))?; - Ok(buf.into_inner()) + // Return the original input bytes. This crate exposes no mutation API, + // so a rebuild via `inner.build().pack()` would be lossy and — for any + // DOCX containing complex fields (TOC, PAGE, PAGEREF, HYPERLINK, …) — + // panics in docx-rs 0.4 (`RunChild::InstrTextString` is reader-only; + // its writer arm is `unreachable!()`). See bokuweb/docx-rs#750. + Ok(self.raw_bytes.clone()) } } diff --git a/py_src/paperjam/_any_document.py b/py_src/paperjam/_any_document.py index f9af17b..ff700a8 100644 --- a/py_src/paperjam/_any_document.py +++ b/py_src/paperjam/_any_document.py @@ -133,9 +133,16 @@ def convert_to(self, format: str) -> bytes: return bytes(self._ensure_open().convert_to(format)) def save(self, path: str | os.PathLike[str]) -> None: - """Save the document to a file.""" - data = self.save_bytes() - with builtins_open(str(path), "wb") as f: + """Save the document to a file. + + The target format is inferred from the file extension. If it differs + from the source format, the document is converted via :meth:`convert_to`; + otherwise the original bytes are written unchanged. + """ + path_str = str(path) + target_ext = os.path.splitext(path_str)[1].lstrip(".").lower() + data = self.convert_to(target_ext) if target_ext and target_ext != self.format else self.save_bytes() + with builtins_open(path_str, "wb") as f: f.write(data) def save_bytes(self) -> bytes: