11"""HTML proxy endpoint for interactive plots with size reporting."""
22
3+ from urllib .parse import urlparse
4+
35import httpx
46from fastapi import APIRouter , HTTPException
57from fastapi .responses import HTMLResponse
810router = APIRouter (tags = ["proxy" ])
911
1012# Script injected to report content size to parent window
13+ # Uses specific origin (pyplots.ai) for postMessage security
1114SIZE_REPORTER_SCRIPT = """
1215<script>
1316(function() {
1417 function reportSize() {
15- // Find the main content element (try common patterns for different libraries)
16- var content = document.querySelector(
17- '.bk-root, .vega-embed, .plotly, .chart-container, #container, .lp-plot, svg, canvas'
18- ) || document.body.firstElementChild || document.body;
19-
20- // Get actual rendered size
21- var rect = content.getBoundingClientRect();
22- var width = Math.max(rect.width, content.scrollWidth || 0, document.body.scrollWidth || 0);
23- var height = Math.max(rect.height, content.scrollHeight || 0, document.body.scrollHeight || 0);
24-
25- // Add padding to account for action buttons, toolbars, and other UI elements
26- var padding = 40;
27- width += padding;
28- height += padding;
29-
30- // Send to parent
31- if (width > 0 && height > 0) {
32- window.parent.postMessage({
33- type: 'pyplots-size',
34- width: Math.ceil(width),
35- height: Math.ceil(height)
36- }, '*');
18+ try {
19+ // Find the main content element (try common patterns for different libraries)
20+ var content = document.querySelector(
21+ '.bk-root, .vega-embed, .plotly, .chart-container, #container, .lp-plot, svg, canvas'
22+ ) || document.body.firstElementChild || document.body;
23+
24+ // Get actual rendered size
25+ var rect = content.getBoundingClientRect();
26+ var width = Math.max(rect.width, content.scrollWidth || 0, document.body.scrollWidth || 0);
27+ var height = Math.max(rect.height, content.scrollHeight || 0, document.body.scrollHeight || 0);
28+
29+ // Add padding to account for action buttons, toolbars, and other UI elements
30+ var padding = 40;
31+ width += padding;
32+ height += padding;
33+
34+ // Send to parent with specific origin for security
35+ if (width > 0 && height > 0 && window.parent !== window) {
36+ window.parent.postMessage({
37+ type: 'pyplots-size',
38+ width: Math.ceil(width),
39+ height: Math.ceil(height)
40+ }, 'https://pyplots.ai');
41+ }
42+ } catch (e) {
43+ // Silently fail if postMessage is blocked
3744 }
3845 }
3946
5865ALLOWED_BUCKET = "pyplots-images"
5966
6067
68+ def validate_gcs_url (url : str ) -> bool :
69+ """Validate that URL is from allowed GCS bucket with no path traversal."""
70+ try :
71+ parsed = urlparse (url )
72+ # Must be HTTPS
73+ if parsed .scheme != "https" :
74+ return False
75+ # Must be exact host (no subdomains)
76+ if parsed .netloc != ALLOWED_HOST :
77+ return False
78+ # Path must start with bucket name and not contain traversal
79+ path_parts = parsed .path .strip ("/" ).split ("/" )
80+ if len (path_parts ) < 2 :
81+ return False
82+ if path_parts [0 ] != ALLOWED_BUCKET :
83+ return False
84+ # Check for path traversal attempts
85+ if ".." in parsed .path :
86+ return False
87+ return True
88+ except Exception :
89+ return False
90+
91+
6192@router .get ("/proxy/html" , response_class = HTMLResponse )
6293async def proxy_html (url : str ):
6394 """
@@ -74,12 +105,12 @@ async def proxy_html(url: str):
74105 Returns:
75106 Modified HTML with size reporting script injected
76107 """
77- # Security: Only allow URLs from our GCS bucket
78- if not url . startswith ( f"https:// { ALLOWED_HOST } / { ALLOWED_BUCKET } /" ):
108+ # Security: Validate URL strictly to prevent SSRF and path traversal
109+ if not validate_gcs_url ( url ):
79110 raise HTTPException (status_code = 400 , detail = f"Only URLs from { ALLOWED_HOST } /{ ALLOWED_BUCKET } are allowed" )
80111
81- # Fetch the HTML
82- async with httpx .AsyncClient (timeout = 30 .0 ) as client :
112+ # Fetch the HTML with shorter timeout
113+ async with httpx .AsyncClient (timeout = 10 .0 ) as client :
83114 try :
84115 response = await client .get (url )
85116 response .raise_for_status ()
0 commit comments