Skip to content

Commit fd8da6a

Browse files
committed
Address 16th PR review: real redirect-rebind test, faithful raise_for_status
All test/doc-only (production logic unchanged): - Finding 1 (genuine gap): the pre-existing test_redirect_to_private_ip_rejected used Location https://127.0.0.1/, which is rejected by the ALLOWLIST check (127.0.0.1 not allowlisted) before _assert_public_ip ever runs — so the actual threat (an allowlisted .gov host that resolves private on a redirect hop) had no coverage. Renamed that test to ..._rejected_by_allowlist (with a match= "allowlist" assertion) and added test_redirect_to_allowlisted_host_resolving_ private_rejected, which 302-redirects to an allowlisted host (ecfr.gov) whose DNS resolves to 127.0.0.1 and asserts the rejection comes from _assert_public_ip (match="non-public"). - Finding 3 (premise was wrong, but test fidelity improved): httpx raise_for_status DOES raise for any non-2xx incl. 3xx (verified 301/304/302), so a 301-without-Location raises, not "silently returns a body". _mock_stream now mirrors that (raise for non-2xx), and the 304 test is rewritten + para- metrized over 301/304 to assert it raises HTTPStatusError rather than the prior mocked body-return artifact. - Finding 2: promoted the params-drop-on-redirect constraint into safe_fetch_bytes' docstring (was inline-comment only) so callers reading the signature see it. - Finding 4 (strip-list vs keep-allowlist) declined: the reviewer flags it as an out-of-scope Phase-5 architectural note; the limitation is already documented.
1 parent b8c0344 commit fd8da6a

2 files changed

Lines changed: 65 additions & 24 deletions

File tree

opencontractserver/tests/test_safe_http.py

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from unittest.mock import MagicMock, patch
1414
from urllib.parse import urlparse
1515

16+
import httpx
1617
import pytest
1718

1819
from opencontractserver.constants.safe_http import MAX_REDIRECTS
@@ -80,7 +81,17 @@ def _mock_stream(status_code: int, body: bytes = b"", headers: dict | None = Non
8081
)
8182
resp.headers = MagicMock()
8283
resp.headers.get = lambda k, default=None: hdr_dict.get(k.lower(), default)
83-
resp.raise_for_status = MagicMock()
84+
85+
def _raise_for_status():
86+
# Faithful to httpx: raise for ANY non-2xx (including a 3xx that was not
87+
# followed as a redirect, e.g. a malformed 301 with no Location), no-op
88+
# for 2xx. Redirect hops never reach this — they ``continue`` first.
89+
if not 200 <= status_code < 300:
90+
raise httpx.HTTPStatusError(
91+
f"{status_code}", request=MagicMock(), response=resp
92+
)
93+
94+
resp.raise_for_status = _raise_for_status
8495

8596
def iter_bytes():
8697
yield body
@@ -339,26 +350,25 @@ def _stream_dispatch(self_client, method, url, **kwargs):
339350
with pytest.raises(SSRFValidationError, match="empty Location"):
340351
safe_fetch_bytes(ALLOWED_URL)
341352

342-
def test_non_location_3xx_not_followed_as_redirect(self):
343-
"""A 3xx without a Location (e.g. 304) is NOT treated as a redirect.
353+
@pytest.mark.parametrize("status_code", [301, 304])
354+
def test_non_location_3xx_not_looped_but_raises(self, status_code):
355+
"""A 3xx WITHOUT a Location is not looped — it falls through to raise_for_status.
344356
345-
httpx reports ``is_redirect=True`` for any 3xx including 304; keying off
346-
``has_redirect_location`` instead means a 304 falls through to normal
347-
handling rather than looping. Here the 304 carries a body, which is
348-
returned (raise_for_status is a MagicMock no-op for the 3xx).
357+
httpx reports ``is_redirect=True`` for any 3xx (including 304/a malformed
358+
301 with no Location); keying off ``has_redirect_location`` means such a
359+
response is NOT followed as a redirect. It then reaches ``raise_for_status``,
360+
which raises for any non-2xx — so the function raises ``HTTPStatusError``
361+
rather than looping to the redirect cap (the old ``is_redirect`` behaviour)
362+
or silently returning a body.
349363
"""
350364

351365
def _stream_dispatch(self_client, method, url, **kwargs):
352-
# The body here is a test artifact to prove we reach the return path;
353-
# a real 304 carries no body (RFC 9110 §15.4.5) and raise_for_status
354-
# (a MagicMock no-op here) does not raise for a 3xx in httpx.
355-
return _mock_stream(304, b"not-modified-body") # no Location header
366+
return _mock_stream(status_code, b"") # no Location header
356367

357368
with patch("socket.getaddrinfo", side_effect=_fake_getaddrinfo_public):
358369
with patch("httpx.Client.stream", _stream_dispatch):
359-
body, host = safe_fetch_bytes(ALLOWED_URL)
360-
assert body == b"not-modified-body"
361-
assert host == ALLOWED_HOST
370+
with pytest.raises(httpx.HTTPStatusError):
371+
safe_fetch_bytes(ALLOWED_URL)
362372

363373
def test_capital_location_header_is_followed(self):
364374
"""A capital-L ``Location`` (the conventional casing) is honoured.
@@ -382,25 +392,50 @@ def _stream_dispatch(self_client, method, url, **kwargs):
382392
assert body == b"final"
383393
assert call_count == 2, "the capital-L redirect must actually be followed"
384394

385-
def test_redirect_to_private_ip_rejected(self):
386-
"""
387-
First hop: allowlisted host → 302 → Location: https://127.0.0.1/
388-
The redirect target must fail IP validation (SSRFValidationError).
395+
def test_redirect_to_private_ip_literal_rejected_by_allowlist(self):
396+
"""A redirect to a private-IP *literal* (``https://127.0.0.1/``) is rejected.
397+
398+
Note this is caught at the ALLOWLIST layer: ``127.0.0.1`` is not in
399+
``PUBLIC_DOMAIN_SOURCE_HOSTS`` so ``validate_url`` rejects it before
400+
``_assert_public_ip`` runs. The IP layer for an *allowlisted* host that
401+
resolves private is exercised separately by
402+
``test_redirect_to_allowlisted_host_resolving_private_rejected``.
389403
"""
390404
redirect_target = "https://127.0.0.1/"
391405

392-
call_count = 0
393-
394406
def _stream_dispatch(self_client, method, url, **kwargs):
395-
nonlocal call_count
396-
call_count += 1
397-
if call_count == 1:
407+
if urlparse(str(url)).hostname == ALLOWED_HOST:
398408
return _mock_stream(302, b"", {"location": redirect_target})
399409
return _mock_stream(200, b"ok")
400410

401411
with patch("socket.getaddrinfo", side_effect=_fake_getaddrinfo_public):
402412
with patch("httpx.Client.stream", _stream_dispatch):
403-
with pytest.raises(SSRFValidationError):
413+
with pytest.raises(SSRFValidationError, match="allowlist"):
414+
safe_fetch_bytes(ALLOWED_URL)
415+
416+
def test_redirect_to_allowlisted_host_resolving_private_rejected(self):
417+
"""The real threat: a redirect to an ALLOWLISTED host that DNS-resolves to a
418+
private IP must be rejected by the IP check, not just the allowlist.
419+
420+
Hop 1 (uscode.house.gov) resolves public and 302-redirects to another
421+
allowlisted host (ecfr.gov); that host then resolves to a private IP. The
422+
rejection must come from ``_assert_public_ip`` (``match="non-public"``),
423+
which is exactly the redirect-hop rebind surface this guard exists for.
424+
"""
425+
426+
def _getaddrinfo(host, port, *args, **kwargs):
427+
# The redirect target (ecfr.gov) resolves private; hop-1 host public.
428+
ip = "127.0.0.1" if "ecfr" in host else "1.1.1.1"
429+
return [(socket.AF_INET, 1, 6, "", (ip, 0))]
430+
431+
def _stream_dispatch(self_client, method, url, **kwargs):
432+
if urlparse(str(url)).hostname == ALLOWED_HOST:
433+
return _mock_stream(302, b"", {"location": "https://www.ecfr.gov/x"})
434+
return _mock_stream(200, b"ok")
435+
436+
with patch("socket.getaddrinfo", side_effect=_getaddrinfo):
437+
with patch("httpx.Client.stream", _stream_dispatch):
438+
with pytest.raises(SSRFValidationError, match="non-public"):
404439
safe_fetch_bytes(ALLOWED_URL)
405440

406441
def test_redirect_to_non_allowlisted_host_rejected(self):

opencontractserver/utils/safe_http.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,12 @@ def safe_fetch_bytes(
184184
- Streams the body and aborts past *max_bytes* (Content-Length AND actual bytes).
185185
- Enforces connect + read timeouts via the module-level ``_DEFAULT_TIMEOUT``.
186186
187+
Caller params note: *params* are forwarded only on the INITIAL request. On any
188+
redirect (same-host or cross-host) the redirect Location is the authoritative
189+
next URL, so *params* are NOT re-appended — a caller whose params are a
190+
required filter (e.g. the eCFR section/part filter) is relying on that endpoint
191+
not redirecting.
192+
187193
Caller headers note: on a cross-host redirect only the STANDARD credential
188194
headers (``CROSS_HOST_STRIPPED_HEADERS``) are stripped. Do NOT pass a
189195
non-standard per-service credential header (``X-Api-Key``, ``X-Auth-Token``,

0 commit comments

Comments
 (0)