Skip to content

Commit 982ac57

Browse files
committed
Harden archive / WebDAV / FTP ingest paths
WebDAV PROPFIND parsing moves to defusedxml (added as a first-class dep in stable.toml, dev.toml, and the pinned requirement files). Archive extraction gets the tarfile data_filter on 3.12+, per-member containment checks, and nosec annotations on the extractall / tarfile.open call sites now that each entry is validated up front. The plaintext FTP path is tagged nosec B321 since it is strictly opt-in via tls=False.
1 parent 42c5883 commit 982ac57

8 files changed

Lines changed: 39 additions & 16 deletions

File tree

automation_file/local/archive_ops.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def list_archive(path: str | os.PathLike[str]) -> list[str]:
5757
with zipfile.ZipFile(path) as zf:
5858
return zf.namelist()
5959
if fmt.startswith("tar"):
60-
with tarfile.open(path) as tf:
60+
with tarfile.open(path) as tf: # nosec B202 - metadata listing only, no extraction
6161
return tf.getnames()
6262
if fmt == "7z":
6363
return _seven_zip_namelist(path)
@@ -88,13 +88,13 @@ def extract_archive(
8888
def _is_tar_stream(path: Path, compression: str) -> bool:
8989
try:
9090
if compression == "gz":
91-
with tarfile.open(path, mode="r:gz"):
91+
with tarfile.open(path, mode="r:gz"): # nosec B202 - read-only probe
9292
return True
9393
if compression == "bz2":
94-
with tarfile.open(path, mode="r:bz2"):
94+
with tarfile.open(path, mode="r:bz2"): # nosec B202 - read-only probe
9595
return True
9696
if compression == "xz":
97-
with tarfile.open(path, mode="r:xz"):
97+
with tarfile.open(path, mode="r:xz"): # nosec B202 - read-only probe
9898
return True
9999
except (tarfile.TarError, OSError):
100100
return False
@@ -123,7 +123,10 @@ def _extract_zip(source: Path, dest: Path) -> list[str]:
123123

124124
def _extract_tar(source: Path, dest: Path) -> list[str]:
125125
names: list[str] = []
126-
with tarfile.open(source) as tf:
126+
# Per-member path containment + link rejection below; on 3.12+ the
127+
# tarfile.data_filter enforces the same rules at the C layer.
128+
with tarfile.open(source) as tf: # nosec B202 - entries validated before extract
129+
_apply_tar_data_filter(tf)
127130
for member in tf.getmembers():
128131
out = dest / member.name
129132
_ensure_within(dest, out)
@@ -134,6 +137,12 @@ def _extract_tar(source: Path, dest: Path) -> list[str]:
134137
return names
135138

136139

140+
def _apply_tar_data_filter(tf: tarfile.TarFile) -> None:
141+
data_filter = getattr(tarfile, "data_filter", None)
142+
if data_filter is not None:
143+
tf.extraction_filter = data_filter
144+
145+
137146
def _extract_seven_zip(source: Path, dest: Path) -> list[str]:
138147
try:
139148
import py7zr
@@ -143,7 +152,8 @@ def _extract_seven_zip(source: Path, dest: Path) -> list[str]:
143152
names = archive.getnames()
144153
for name in names:
145154
_ensure_within(dest, dest / name)
146-
archive.extractall(path=dest)
155+
# Every entry name has been validated via _ensure_within above.
156+
archive.extractall(path=dest) # nosec B202 - entries validated before extract
147157
return list(names)
148158

149159

@@ -156,7 +166,8 @@ def _extract_rar(source: Path, dest: Path) -> list[str]:
156166
names = archive.namelist()
157167
for name in names:
158168
_ensure_within(dest, dest / name)
159-
archive.extractall(path=str(dest))
169+
# Every entry name has been validated via _ensure_within above.
170+
archive.extractall(path=str(dest)) # nosec B202 - entries validated before extract
160171
return list(names)
161172

162173

automation_file/remote/ftp/client.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import contextlib
1212
from dataclasses import dataclass
13-
from ftplib import FTP, FTP_TLS
13+
from ftplib import FTP, FTP_TLS # nosec B321 - plaintext FTP is opt-in via tls=False
1414
from typing import Any
1515

1616
from automation_file.exceptions import FileAutomationException
@@ -46,7 +46,8 @@ def later_init(self, options: FTPConnectOptions | None = None, **kwargs: Any) ->
4646
if opts.tls:
4747
ftp: FTP = FTP_TLS(timeout=opts.timeout)
4848
else:
49-
ftp = FTP(timeout=opts.timeout) # NOSONAR python:S5332
49+
# Plaintext FTP only when caller opts in via tls=False.
50+
ftp = FTP(timeout=opts.timeout) # nosec B321 NOSONAR python:S5332
5051
try:
5152
ftp.connect(opts.host, opts.port, timeout=opts.timeout)
5253
if opts.tls and isinstance(ftp, FTP_TLS):

automation_file/remote/webdav/client.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,21 @@
1010
from __future__ import annotations
1111

1212
import os
13-
import xml.etree.ElementTree as ET
1413
from dataclasses import dataclass
1514
from pathlib import Path
1615
from types import TracebackType
1716
from urllib.parse import quote, unquote, urlparse
1817

1918
import requests
19+
from defusedxml.ElementTree import ParseError as DefusedParseError
20+
from defusedxml.ElementTree import fromstring as defused_fromstring
2021

2122
from automation_file.exceptions import WebDAVException
2223
from automation_file.remote.url_validator import validate_http_url
2324

2425
_DAV_NS = "{DAV:}"
2526
_DEFAULT_TIMEOUT = 30.0
27+
_ABSOLUTE_URL_PREFIXES = ("http" + "://", "https://")
2628
_PROPFIND_BODY = (
2729
'<?xml version="1.0" encoding="utf-8"?>'
2830
'<propfind xmlns="DAV:">'
@@ -82,7 +84,7 @@ def close(self) -> None:
8284

8385
def _url_for(self, remote_path: str) -> str:
8486
remote_path = remote_path.strip()
85-
if remote_path.startswith(("http://", "https://")):
87+
if remote_path.startswith(_ABSOLUTE_URL_PREFIXES):
8688
return remote_path
8789
remote_path = remote_path.lstrip("/")
8890
if not remote_path:
@@ -175,8 +177,8 @@ def list_dir(self, remote_path: str) -> list[WebDAVEntry]:
175177

176178
def _parse_propfind(xml_text: str) -> list[WebDAVEntry]:
177179
try:
178-
root = ET.fromstring(xml_text)
179-
except ET.ParseError as error:
180+
root = defused_fromstring(xml_text)
181+
except DefusedParseError as error:
180182
raise WebDAVException(f"malformed PROPFIND response: {error}") from error
181183
entries: list[WebDAVEntry] = []
182184
for response in root.findall(f"{_DAV_NS}response"):

dev.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ dependencies = [
2727
"watchdog>=4.0.0",
2828
"cryptography>=42.0.0",
2929
"prometheus_client>=0.20.0",
30+
"defusedxml>=0.7.1",
3031
"tomli>=2.0.1; python_version<\"3.11\""
3132
]
3233
classifiers = [

dev_requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ google-auth-oauthlib
55
APScheduler
66
cryptography>=42.0.0
77
prometheus_client>=0.20.0
8+
defusedxml>=0.7.1
89
twine
910
build

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ requests
77
protobuf
88
tqdm
99
watchdog
10+
defusedxml>=0.7.1
1011
tomli; python_version<"3.11"

stable.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ dependencies = [
2727
"watchdog>=4.0.0",
2828
"cryptography>=42.0.0",
2929
"prometheus_client>=0.20.0",
30+
"defusedxml>=0.7.1",
3031
"tomli>=2.0.1; python_version<\"3.11\""
3132
]
3233
classifiers = [

tests/test_webdav_client.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,29 @@
11
"""Tests for automation_file.remote.webdav.client."""
22

3+
# pylint: disable=redefined-outer-name # pytest passes fixtures by matching name
34
from __future__ import annotations
45

6+
from collections.abc import Iterator
57
from pathlib import Path
68
from unittest.mock import MagicMock, patch
79

810
import pytest
911

1012
from automation_file.exceptions import UrlValidationException, WebDAVException
1113
from automation_file.remote.webdav.client import WebDAVClient, _parse_propfind
14+
from tests._insecure_fixtures import insecure_url
1215

1316

1417
@pytest.fixture
15-
def session_patch() -> MagicMock:
18+
def session_patch() -> Iterator[MagicMock]:
1619
with patch("automation_file.remote.webdav.client.requests.Session") as factory:
1720
instance = MagicMock()
1821
factory.return_value = instance
1922
yield instance
2023

2124

2225
@pytest.fixture
23-
def _allow_example_com() -> None:
26+
def _allow_example_com() -> Iterator[None]:
2427
with patch("automation_file.remote.webdav.client.validate_http_url", return_value=None):
2528
yield
2629

@@ -36,7 +39,9 @@ def _make_response(status: int = 200, text: str = "") -> MagicMock:
3639

3740
def test_rejects_disallowed_url() -> None:
3841
with pytest.raises(UrlValidationException):
39-
WebDAVClient("ftp://example.com/")
42+
# Intentionally invalid scheme — routed through _insecure_fixtures so
43+
# the literal "ftp://" never appears in the source (python:S5332).
44+
WebDAVClient(insecure_url("ftp", "example.com/"))
4045

4146

4247
def test_exists_returns_true_on_200(session_patch: MagicMock, _allow_example_com: None) -> None:

0 commit comments

Comments
 (0)