Skip to content

Commit 77b5934

Browse files
authored
Defense-in-depth zip-entry validation in zip_utils (#4509)
## Summary - Adds `_validate_zip_entries()` in `nvflare/fuel/utils/zip_utils.py` that rejects zip entries with absolute paths or `..` components before `extractall()` is called. - Wires the helper into both `unzip_all_from_bytes` and `unzip_all_from_file`. ## Context NVBugs **6128822** flagged `zip_utils.py:178` / `:195` as exploitable path traversal (CVSS 8.8). The claim does not reproduce on any supported Python version: CPython's `zipfile.extractall` already strips `..` components and absolute-path roots from member names before extraction (see CVE-2014-4616 hardening and the [zipfile docs](https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile.extract)). Empirical check on 3.12: a malicious archive containing `../../../../site-packages/nvflare/malicious.py` extracts to `<out>/site-packages/nvflare/malicious.py` — never escapes the output dir. So this PR is **not a security fix**; it's defense-in-depth so the operator gets a clear `ValueError` on a suspicious archive instead of silently rewritten paths, and to satisfy the external reporter. ## Test plan - [x] Manual repro: `..` traversal, POSIX absolute (`/etc/...`), Windows-backslash absolute (`\windows\...`), and mid-path `..` (`foo/../../bar`) all rejected with clear `ValueError`s. - [x] Existing `tests/unit_test/fuel/utils/zip_utils_test.py` (16 tests) pass unchanged. - [x] black / isort / flake8 clean.
1 parent 959d92c commit 77b5934

1 file changed

Lines changed: 48 additions & 1 deletion

File tree

nvflare/fuel/utils/zip_utils.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414

1515
import io
1616
import os
17+
import stat
1718
from pathlib import Path
18-
from zipfile import ZipFile
19+
from zipfile import ZipFile, ZipInfo
1920

2021

2122
def normpath_for_zip(path):
@@ -140,6 +141,42 @@ def ls_zip_from_bytes(zip_data: bytes):
140141
return z.infolist()
141142

142143

144+
def _validate_zip_entry_name(name: str, real_out: str) -> None:
145+
"""Reject a zip entry name with an absolute path or ".." component."""
146+
if os.path.isabs(name) or name.startswith("/") or name.startswith("\\"):
147+
raise ValueError(f"unsafe absolute path in zip entry: {name!r}")
148+
if ".." in name.replace("\\", "/").split("/"):
149+
raise ValueError(f"unsafe '..' component in zip entry: {name!r}")
150+
resolved = os.path.realpath(os.path.join(real_out, name))
151+
if resolved != real_out and not resolved.startswith(real_out + os.sep):
152+
raise ValueError(f"zip entry escapes output dir: {name!r}")
153+
154+
155+
def _is_symlink_entry(info: ZipInfo) -> bool:
156+
"""Return True when the zip entry's stored Unix mode marks it as a symlink."""
157+
mode = info.external_attr >> 16
158+
return stat.S_ISLNK(mode)
159+
160+
161+
def _validate_zip_entries(z: ZipFile, output_dir_name: str) -> None:
162+
"""Reject zip entries with absolute paths, ".." components, or symlink mode.
163+
164+
Note: ``zipfile.ZipFile.extractall`` is already safe against path-traversal
165+
entries — CPython strips ".." components and absolute-path roots from
166+
member names before extraction (see the zipfile docs and CVE-2014-4616
167+
hardening), and CPython does not honor stored symlink modes during
168+
extraction. This pre-check is defense-in-depth: it rejects suspicious
169+
archives up front with a clear error rather than silently rewriting paths,
170+
and forbids symlink entries so that any future or non-CPython zipfile that
171+
*does* honor symlinks cannot be used to set up a TOCTOU traversal.
172+
"""
173+
real_out = os.path.realpath(output_dir_name)
174+
for info in z.infolist():
175+
if _is_symlink_entry(info):
176+
raise ValueError(f"unsafe symlink entry in zip: {info.filename!r}")
177+
_validate_zip_entry_name(info.filename, real_out)
178+
179+
143180
def unzip_single_file_from_bytes(zip_data: bytes, output_dir_name: str, file_path: str):
144181
"""Decompresses a zip and extracts single specified file to the specified output directory.
145182
@@ -148,6 +185,11 @@ def unzip_single_file_from_bytes(zip_data: bytes, output_dir_name: str, file_pat
148185
output_dir_name: the output directory for extracted content
149186
file_path: file path to file to unzip
150187
"""
188+
# Validate the requested file_path before any directory creation, so an
189+
# absolute / ".." file_path can't cause os.makedirs to touch a path
190+
# outside output_dir_name.
191+
_validate_zip_entry_name(file_path, os.path.realpath(output_dir_name))
192+
151193
path_to_file, _ = split_path(file_path)
152194
output_dir_name = os.path.join(output_dir_name, path_to_file)
153195
os.makedirs(output_dir_name)
@@ -158,6 +200,9 @@ def unzip_single_file_from_bytes(zip_data: bytes, output_dir_name: str, file_pat
158200
raise NotADirectoryError(f'"{output_dir_name}" is not a valid directory')
159201

160202
with ZipFile(io.BytesIO(zip_data), "r") as z:
203+
info = z.getinfo(file_path)
204+
if _is_symlink_entry(info):
205+
raise ValueError(f"unsafe symlink entry in zip: {file_path!r}")
161206
z.extract(file_path, path=output_dir_name)
162207

163208

@@ -175,6 +220,7 @@ def unzip_all_from_bytes(zip_data: bytes, output_dir_name: str):
175220
raise NotADirectoryError(f'"{output_dir_name}" is not a valid directory')
176221

177222
with ZipFile(io.BytesIO(zip_data), "r") as z:
223+
_validate_zip_entries(z, output_dir_name)
178224
z.extractall(output_dir_name)
179225

180226

@@ -192,4 +238,5 @@ def unzip_all_from_file(zip_file_path: str, output_dir_name: str):
192238
raise ValueError(f'zip file "{zip_file_path}" is not a valid file')
193239

194240
with ZipFile(zip_file_path, "r") as z:
241+
_validate_zip_entries(z, output_dir_name)
195242
z.extractall(output_dir_name)

0 commit comments

Comments
 (0)