Skip to content

Commit b3a60f5

Browse files
Copilotmnriem
andauthored
Improve tarball extraction security and cleanup logic
Agent-Logs-Url: https://github.com/github/spec-kit/sessions/9fb9a8ea-0967-4baf-b95c-7101e423ff58 Co-authored-by: mnriem <15701806+mnriem@users.noreply.github.com>
1 parent b37f117 commit b3a60f5

2 files changed

Lines changed: 15 additions & 7 deletions

File tree

src/specify_cli/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3643,6 +3643,7 @@ def extension_add(
36433643
download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads"
36443644
download_dir.mkdir(parents=True, exist_ok=True)
36453645
archive_fmt = _detect_archive_format(from_url)
3646+
archive_path = None
36463647

36473648
try:
36483649
with urllib.request.urlopen(from_url, timeout=60) as response:
@@ -3661,11 +3662,9 @@ def extension_add(
36613662
console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}")
36623663
raise typer.Exit(1)
36633664
finally:
3664-
# Clean up downloaded archive
3665-
for _suffix in (".zip", ".tar.gz"):
3666-
_p = download_dir / f"{extension}-url-download{_suffix}"
3667-
if _p.exists():
3668-
_p.unlink()
3665+
# Clean up the downloaded archive
3666+
if archive_path is not None and archive_path.exists():
3667+
archive_path.unlink()
36693668

36703669
else:
36713670
# Try bundled extensions first (shipped with spec-kit)

src/specify_cli/extensions.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,11 @@ def _safe_extract_tarball(
155155
(devices, FIFOs, etc.) are rejected.
156156
157157
On Python 3.12 and later the ``"data"`` extraction filter is applied
158-
for an additional layer of OS-level protection.
158+
for an additional layer of OS-level protection. On earlier versions
159+
the explicit member list (containing only pre-validated regular files
160+
and directories) is passed to ``extractall()`` — since all symlinks are
161+
already rejected in the validation phase, no archive-introduced symlink
162+
can be followed during extraction.
159163
160164
Args:
161165
archive_path: Path to the ``.tar.gz``/``.tgz`` archive.
@@ -169,6 +173,7 @@ def _safe_extract_tarball(
169173

170174
with tarfile.open(archive_path, "r:gz") as tf:
171175
members = tf.getmembers()
176+
safe_members = []
172177

173178
# Validate every member before extracting anything.
174179
for member in members:
@@ -201,11 +206,15 @@ def _safe_extract_tarball(
201206
f"Non-regular file in archive: {member.name}"
202207
)
203208

209+
safe_members.append(member)
210+
204211
# Extract — use the "data" filter on Python 3.12+ for extra hardening.
212+
# On older versions pass only the pre-validated members so that no
213+
# unvetted entry (added concurrently or via a race) slips through.
205214
if sys.version_info >= (3, 12):
206215
tf.extractall(dest_dir, filter="data") # type: ignore[call-arg]
207216
else:
208-
tf.extractall(dest_dir) # noqa: S202 — validated manually above
217+
tf.extractall(dest_dir, members=safe_members) # noqa: S202 — validated above
209218

210219

211220
@dataclass

0 commit comments

Comments
 (0)