Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ Change Log
Other:

* Retrospectively mark `4756 <https://github.com/pymupdf/PyMuPDF/issues/4756>`_ as fixed in 1.26.6.
* Improved safety of `pymupdf embed-extract`. This now refuses to write to
an existing file or outside current directory, unless `-output` or new flag
`-unsafe` is specified.


**Changes in version 1.26.6** (2025-11-05)
Expand Down
3 changes: 2 additions & 1 deletion docs/module.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ Extraction
Extract an embedded file like this::

pymupdf embed-extract -h
usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-output OUTPUT]
usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-unsafe] [-output OUTPUT]
input

---------------------- extract embedded file to disk ----------------------
Expand All @@ -311,6 +311,7 @@ Extract an embedded file like this::
-h, --help show this help message and exit
-name NAME name of entry
-password PASSWORD password
-unsafe allow write to stored name even if an existing file or outside current directory
-output OUTPUT output filename, default is stored name

For details consult :meth:`Document.embfile_get`. Example (refer to previous section)::
Expand Down
6 changes: 6 additions & 0 deletions pipcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2435,6 +2435,12 @@ def log(text):
log(f'{sys.version_info=}')
log(f'{list(sys.version_info)=}')

log(f'{sysconfig.get_config_var("Py_GIL_DISABLED")=}')
try:
log(f'{sys._is_gil_enabled()=}')
except AttributeError:
log(f'sys._is_gil_enabled() => AttributeError')

log(f'CPU bits: {cpu_bits()}')

log(f'sys.argv ({len(sys.argv)}):')
Expand Down
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,12 @@ def build_mupdf_unix(
if PYMUPDF_SETUP_SWIG:
command += f' --swig {shlex.quote(PYMUPDF_SETUP_SWIG)}'
command += f' -d build/{build_prefix}{build_type} -b'
#command += f' --m-target libs'
if sys.implementation.name == 'graalpy':
# Force rerun of swig.
pipcl.run(f'ls -l {mupdf_local}/platform/python/')
for p in glob.glob(f'{mupdf_local}/platform/python/mupdfcpp*.i.cpp'):
pipcl.log(f'Graal, deleting: {p!r}')
pipcl.fs_remove(p)
if PYMUPDF_SETUP_MUPDF_REFCHECK_IF:
command += f' --refcheck-if "{PYMUPDF_SETUP_MUPDF_REFCHECK_IF}"'
if PYMUPDF_SETUP_MUPDF_TRACE_IF:
Expand Down
9 changes: 9 additions & 0 deletions src/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ def embedded_get(args):
except (ValueError, pymupdf.mupdf.FzErrorBase) as e:
sys.exit(f'no such embedded file {args.name!r}: {e}')
filename = args.output if args.output else d["filename"]
if not args.unsafe and not args.output:
if os.path.exists(filename):
sys.exit(f'refusing to overwrite existing file with stored name: {filename}')
filename_abs = os.path.abspath(filename)
if not filename_abs.startswith(os.getcwd() + os.sep):
sys.exit(f'refusing to write stored name outside current directory: {filename}')
with open(filename, "wb") as output:
output.write(stream)
pymupdf.message("saved entry '%s' as '%s'" % (args.name, filename))
Expand Down Expand Up @@ -1024,6 +1030,9 @@ def main():
ps_embed_extract.add_argument("input", type=str, help="PDF filename")
ps_embed_extract.add_argument("-name", required=True, help="name of entry")
ps_embed_extract.add_argument("-password", help="password")
ps_embed_extract.add_argument("-unsafe", default=False, action="store_true",
help="allow write to stored name even if an existing file or outside current directory"
)
ps_embed_extract.add_argument(
"-output", help="output filename, default is stored name"
)
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def install_required_packages():
if platform.system() == 'Windows' and int.bit_length(sys.maxsize+1) == 32:
# No pillow wheel available, and doesn't build easily.
pass
elif platform.python_implementation() == 'GraalVM':
pass
else:
packages += ' pillow'
if platform.system().startswith('MSYS_NT-'):
Expand Down
86 changes: 86 additions & 0 deletions tests/test_4767.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import shutil
import os
import pymupdf
import subprocess
import sys


def test_4767():
'''
Check handling of unsafe paths in `pymupdf embed-extract`.
'''
with pymupdf.open() as document:
document.new_page()
document.embfile_add(
'evil_entry',
b'poc:traversal test\n',
filename="../../test.txt",
ufilename="../../test.txt",
desc="poc",
)
document.embfile_add(
'evil_entry2',
b'poc:traversal test\n',
filename="test2.txt",
ufilename="test2.txt",
desc="poc",
)
path = os.path.abspath(f'{__file__}/../../tests/test_4767.pdf')
document.save(path)
testdir = os.path.abspath(f'{__file__}/../../tests/test_4767_dir').replace('\\', '/')
shutil.rmtree(testdir, ignore_errors=1)
os.makedirs(f'{testdir}/one/two', exist_ok=1)

def run(command, *, check=0, capture=1):
print(f'Running: {command}')
cp = subprocess.run(
command, shell=1,
text=1,
check=check,
stdout=subprocess.PIPE if capture else None,
stderr=subprocess.STDOUT if capture else None,
)
print(cp.stdout)
return cp

def get_paths():
paths = list()
for dirpath, dirnames, filenames in os.walk(testdir):
for filename in filenames:
path = f'{dirpath}/{filename}'.replace('\\', '/')
paths.append(path)
return paths

cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry')
print(cp.stdout)
assert cp.returncode
assert cp.stdout == 'refusing to write stored name outside current directory: ../../test.txt\n'
assert not get_paths()

cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry -unsafe')
assert cp.returncode == 0
assert cp.stdout == "saved entry 'evil_entry' as '../../test.txt'\n"
paths = get_paths()
print(f'{paths=}')
assert paths == [f'{testdir}/test.txt']

cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2')
assert not cp.returncode
assert cp.stdout == "saved entry 'evil_entry2' as 'test2.txt'\n"
paths = get_paths()
print(f'{paths=}')
assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']

cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2')
assert cp.returncode
assert cp.stdout == "refusing to overwrite existing file with stored name: test2.txt\n"
paths = get_paths()
print(f'{paths=}')
assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']

cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2 -unsafe')
assert not cp.returncode
assert cp.stdout == "saved entry 'evil_entry2' as 'test2.txt'\n"
paths = get_paths()
print(f'{paths=}')
assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']
Loading