Skip to content

Commit 369587b

Browse files
test: add comprehensive tests for image optimization and proxy functionality
- Implement tests for pngquant fallback behavior in image optimization - Add CLI command tests for thumbnail and process commands - Create tests for HTML proxy endpoint, including URL validation and security checks - Introduce tests for custom SQLAlchemy types in core.database.types
1 parent 2df734c commit 369587b

File tree

3 files changed

+697
-0
lines changed

3 files changed

+697
-0
lines changed

tests/unit/api/test_proxy.py

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
"""
2+
Tests for api/routers/proxy.py - HTML proxy endpoint.
3+
4+
Tests URL validation, security checks, and HTML injection.
5+
"""
6+
7+
from unittest.mock import AsyncMock, patch
8+
9+
import httpx
10+
import pytest
11+
from fastapi.testclient import TestClient
12+
13+
from api.main import app
14+
from api.routers.proxy import ALLOWED_BUCKET, ALLOWED_HOST, SIZE_REPORTER_SCRIPT, build_safe_gcs_url
15+
16+
17+
@pytest.fixture
18+
def client() -> TestClient:
19+
"""Create a test client for the FastAPI app."""
20+
return TestClient(app)
21+
22+
23+
class TestBuildSafeGcsUrl:
24+
"""Tests for build_safe_gcs_url() security function."""
25+
26+
def test_valid_gcs_url(self):
27+
"""Valid GCS URL should be reconstructed."""
28+
url = "https://storage.googleapis.com/pyplots-images/plots/scatter-basic/matplotlib/plot.html"
29+
result = build_safe_gcs_url(url)
30+
assert result == url
31+
32+
def test_valid_gcs_url_with_subdirectories(self):
33+
"""Valid GCS URL with multiple subdirectories."""
34+
url = "https://storage.googleapis.com/pyplots-images/staging/scatter-basic/matplotlib/plot.html"
35+
result = build_safe_gcs_url(url)
36+
assert result == url
37+
38+
def test_http_url_rejected(self):
39+
"""HTTP (non-HTTPS) URLs should be rejected."""
40+
url = "http://storage.googleapis.com/pyplots-images/plots/scatter-basic/plot.html"
41+
result = build_safe_gcs_url(url)
42+
assert result is None
43+
44+
def test_wrong_host_rejected(self):
45+
"""URLs with wrong host should be rejected."""
46+
url = "https://evil.com/pyplots-images/plots/scatter-basic/plot.html"
47+
result = build_safe_gcs_url(url)
48+
assert result is None
49+
50+
def test_subdomain_rejected(self):
51+
"""URLs with subdomain of allowed host should be rejected."""
52+
url = "https://evil.storage.googleapis.com/pyplots-images/plots/plot.html"
53+
result = build_safe_gcs_url(url)
54+
assert result is None
55+
56+
def test_wrong_bucket_rejected(self):
57+
"""URLs with wrong bucket should be rejected."""
58+
url = "https://storage.googleapis.com/other-bucket/plots/scatter-basic/plot.html"
59+
result = build_safe_gcs_url(url)
60+
assert result is None
61+
62+
def test_path_traversal_rejected(self):
63+
"""Path traversal attempts should be rejected."""
64+
url = "https://storage.googleapis.com/pyplots-images/../other-bucket/plot.html"
65+
result = build_safe_gcs_url(url)
66+
assert result is None
67+
68+
def test_double_dot_in_path_rejected(self):
69+
"""Double dots anywhere in path should be rejected."""
70+
url = "https://storage.googleapis.com/pyplots-images/plots/..hidden/plot.html"
71+
result = build_safe_gcs_url(url)
72+
assert result is None
73+
74+
def test_short_path_rejected(self):
75+
"""Paths without file should be rejected."""
76+
url = "https://storage.googleapis.com/pyplots-images"
77+
result = build_safe_gcs_url(url)
78+
assert result is None
79+
80+
def test_bucket_only_rejected(self):
81+
"""Bucket without path should be rejected."""
82+
url = "https://storage.googleapis.com/pyplots-images/"
83+
result = build_safe_gcs_url(url)
84+
assert result is None
85+
86+
def test_invalid_characters_rejected(self):
87+
"""Paths with invalid characters should be rejected."""
88+
# Semicolon is not allowed
89+
url = "https://storage.googleapis.com/pyplots-images/plots/test;rm+-rf/plot.html"
90+
result = build_safe_gcs_url(url)
91+
assert result is None
92+
93+
def test_space_in_path_rejected(self):
94+
"""Paths with spaces should be rejected."""
95+
url = "https://storage.googleapis.com/pyplots-images/plots/test plot/plot.html"
96+
result = build_safe_gcs_url(url)
97+
assert result is None
98+
99+
def test_query_string_stripped(self):
100+
"""Query strings should be stripped (not in reconstructed URL)."""
101+
url = "https://storage.googleapis.com/pyplots-images/plots/scatter/plot.html?token=abc"
102+
result = build_safe_gcs_url(url)
103+
# Query is stripped by urlparse when we only use path
104+
assert result == "https://storage.googleapis.com/pyplots-images/plots/scatter/plot.html"
105+
106+
def test_valid_characters_allowed(self):
107+
"""Alphanumeric, hyphens, underscores, dots, slashes, plus are allowed."""
108+
url = "https://storage.googleapis.com/pyplots-images/plots/scatter-basic_v2/plot+extra.html"
109+
result = build_safe_gcs_url(url)
110+
assert result == url
111+
112+
def test_empty_url_rejected(self):
113+
"""Empty URL should be rejected."""
114+
result = build_safe_gcs_url("")
115+
assert result is None
116+
117+
def test_malformed_url_rejected(self):
118+
"""Malformed URL should be rejected."""
119+
result = build_safe_gcs_url("not-a-url")
120+
assert result is None
121+
122+
def test_fragment_stripped(self):
123+
"""URL fragments should be stripped."""
124+
url = "https://storage.googleapis.com/pyplots-images/plots/scatter/plot.html#section"
125+
result = build_safe_gcs_url(url)
126+
assert result == "https://storage.googleapis.com/pyplots-images/plots/scatter/plot.html"
127+
128+
129+
class TestProxyHtmlEndpoint:
130+
"""Tests for /proxy/html endpoint."""
131+
132+
def test_invalid_url_returns_400(self, client):
133+
"""Invalid URL should return 400 error."""
134+
response = client.get("/proxy/html", params={"url": "http://evil.com/page.html"})
135+
assert response.status_code == 400
136+
assert ALLOWED_HOST in response.json()["message"]
137+
138+
def test_wrong_bucket_returns_400(self, client):
139+
"""Wrong bucket should return 400 error."""
140+
response = client.get("/proxy/html", params={"url": "https://storage.googleapis.com/other-bucket/plot.html"})
141+
assert response.status_code == 400
142+
143+
@patch("api.routers.proxy.httpx.AsyncClient")
144+
def test_valid_url_injects_script_before_body(self, mock_client_class, client):
145+
"""Valid URL should inject script before </body>."""
146+
mock_response = AsyncMock()
147+
mock_response.text = "<html><body><h1>Test</h1></body></html>"
148+
mock_response.raise_for_status = lambda: None # Sync method, no-op
149+
150+
mock_client = AsyncMock()
151+
mock_client.get = AsyncMock(return_value=mock_response)
152+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
153+
mock_client.__aexit__ = AsyncMock(return_value=None)
154+
mock_client_class.return_value = mock_client
155+
156+
response = client.get(
157+
"/proxy/html", params={"url": "https://storage.googleapis.com/pyplots-images/plots/test/plot.html"}
158+
)
159+
160+
assert response.status_code == 200
161+
assert "pyplots-size" in response.text
162+
assert SIZE_REPORTER_SCRIPT.strip() in response.text
163+
assert response.text.index("<script>") < response.text.index("</body>")
164+
165+
@patch("api.routers.proxy.httpx.AsyncClient")
166+
def test_valid_url_injects_script_before_html_if_no_body(self, mock_client_class, client):
167+
"""If no </body>, inject before </html>."""
168+
mock_response = AsyncMock()
169+
mock_response.text = "<html><h1>Test</h1></html>"
170+
mock_response.raise_for_status = lambda: None # Sync method, no-op
171+
172+
mock_client = AsyncMock()
173+
mock_client.get = AsyncMock(return_value=mock_response)
174+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
175+
mock_client.__aexit__ = AsyncMock(return_value=None)
176+
mock_client_class.return_value = mock_client
177+
178+
response = client.get(
179+
"/proxy/html", params={"url": "https://storage.googleapis.com/pyplots-images/plots/test/plot.html"}
180+
)
181+
182+
assert response.status_code == 200
183+
assert "pyplots-size" in response.text
184+
assert response.text.index("<script>") < response.text.index("</html>")
185+
186+
@patch("api.routers.proxy.httpx.AsyncClient")
187+
def test_valid_url_appends_script_if_no_body_or_html(self, mock_client_class, client):
188+
"""If no </body> or </html>, append to end."""
189+
mock_response = AsyncMock()
190+
mock_response.text = "<div>Just content</div>"
191+
mock_response.raise_for_status = lambda: None # Sync method, no-op
192+
193+
mock_client = AsyncMock()
194+
mock_client.get = AsyncMock(return_value=mock_response)
195+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
196+
mock_client.__aexit__ = AsyncMock(return_value=None)
197+
mock_client_class.return_value = mock_client
198+
199+
response = client.get(
200+
"/proxy/html", params={"url": "https://storage.googleapis.com/pyplots-images/plots/test/plot.html"}
201+
)
202+
203+
assert response.status_code == 200
204+
assert response.text.endswith("</script>\n")
205+
206+
@patch("api.routers.proxy.httpx.AsyncClient")
207+
def test_http_error_from_upstream(self, mock_client_class, client):
208+
"""HTTP error from upstream should return appropriate status."""
209+
# Create a proper mock response for HTTPStatusError
210+
mock_request = httpx.Request("GET", "https://storage.googleapis.com/test")
211+
mock_http_response = httpx.Response(404, request=mock_request)
212+
213+
mock_response = AsyncMock()
214+
mock_response.status_code = 404
215+
216+
def raise_status():
217+
raise httpx.HTTPStatusError("Not found", request=mock_request, response=mock_http_response)
218+
219+
mock_response.raise_for_status = raise_status
220+
221+
mock_client = AsyncMock()
222+
mock_client.get = AsyncMock(return_value=mock_response)
223+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
224+
mock_client.__aexit__ = AsyncMock(return_value=None)
225+
mock_client_class.return_value = mock_client
226+
227+
response = client.get(
228+
"/proxy/html", params={"url": "https://storage.googleapis.com/pyplots-images/plots/test/plot.html"}
229+
)
230+
231+
assert response.status_code == 404
232+
assert "Failed to fetch HTML" in response.json()["message"]
233+
234+
@patch("api.routers.proxy.httpx.AsyncClient")
235+
def test_connection_error_returns_502(self, mock_client_class, client):
236+
"""Connection error should return 502."""
237+
mock_request = httpx.Request("GET", "https://storage.googleapis.com/test")
238+
239+
mock_client = AsyncMock()
240+
mock_client.get = AsyncMock(side_effect=httpx.RequestError("Connection failed", request=mock_request))
241+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
242+
mock_client.__aexit__ = AsyncMock(return_value=None)
243+
mock_client_class.return_value = mock_client
244+
245+
response = client.get(
246+
"/proxy/html", params={"url": "https://storage.googleapis.com/pyplots-images/plots/test/plot.html"}
247+
)
248+
249+
assert response.status_code == 502
250+
assert "Failed to connect to storage" in response.json()["message"]
251+
252+
253+
class TestSizeReporterScript:
254+
"""Tests for the injected SIZE_REPORTER_SCRIPT."""
255+
256+
def test_script_uses_specific_origin(self):
257+
"""Script should use specific origin, not wildcard."""
258+
assert "'*'" not in SIZE_REPORTER_SCRIPT
259+
assert "https://pyplots.ai" in SIZE_REPORTER_SCRIPT
260+
261+
def test_script_sends_pyplots_size_message(self):
262+
"""Script should send pyplots-size message type."""
263+
assert "pyplots-size" in SIZE_REPORTER_SCRIPT
264+
265+
def test_script_reports_width_and_height(self):
266+
"""Script should report width and height."""
267+
assert "width:" in SIZE_REPORTER_SCRIPT
268+
assert "height:" in SIZE_REPORTER_SCRIPT
269+
270+
271+
class TestConstants:
272+
"""Tests for module constants."""
273+
274+
def test_allowed_host(self):
275+
"""ALLOWED_HOST should be GCS domain."""
276+
assert ALLOWED_HOST == "storage.googleapis.com"
277+
278+
def test_allowed_bucket(self):
279+
"""ALLOWED_BUCKET should be pyplots-images."""
280+
assert ALLOWED_BUCKET == "pyplots-images"

0 commit comments

Comments
 (0)