Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
health_router,
libraries_router,
plots_router,
proxy_router,
seo_router,
specs_router,
stats_router,
Expand Down Expand Up @@ -128,6 +129,7 @@ async def add_cache_headers(request: Request, call_next):
app.include_router(plots_router)
app.include_router(download_router)
app.include_router(seo_router)
app.include_router(proxy_router)


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions api/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from api.routers.health import router as health_router
from api.routers.libraries import router as libraries_router
from api.routers.plots import router as plots_router
from api.routers.proxy import router as proxy_router
from api.routers.seo import router as seo_router
from api.routers.specs import router as specs_router
from api.routers.stats import router as stats_router
Expand All @@ -14,6 +15,7 @@
"health_router",
"libraries_router",
"plots_router",
"proxy_router",
"seo_router",
"specs_router",
"stats_router",
Expand Down
151 changes: 151 additions & 0 deletions api/routers/proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""HTML proxy endpoint for interactive plots with size reporting."""

from urllib.parse import urlparse

import httpx
from fastapi import APIRouter, HTTPException
from fastapi.responses import HTMLResponse


router = APIRouter(tags=["proxy"])

# Script injected to report content size to parent window
# Uses specific origin (pyplots.ai) for postMessage security
SIZE_REPORTER_SCRIPT = """
<script>
(function() {
function reportSize() {
try {
// Find the main content element (try common patterns for different libraries)
var content = document.querySelector(
'.bk-root, .vega-embed, .plotly, .chart-container, #container, .lp-plot, svg, canvas'
) || document.body.firstElementChild || document.body;

// Get actual rendered size
var rect = content.getBoundingClientRect();
var width = Math.max(rect.width, content.scrollWidth || 0, document.body.scrollWidth || 0);
var height = Math.max(rect.height, content.scrollHeight || 0, document.body.scrollHeight || 0);

// Add padding to account for action buttons, toolbars, and other UI elements
var padding = 40;
width += padding;
height += padding;

// Send to parent with specific origin for security
if (width > 0 && height > 0 && window.parent !== window) {
window.parent.postMessage({
type: 'pyplots-size',
width: Math.ceil(width),
height: Math.ceil(height)
}, 'https://pyplots.ai');
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The postMessage origin is hardcoded to 'https://pyplots.ai' which will fail in development and staging environments. The InteractivePage allows the development origin (window.location.origin) when receiving messages, but the proxy script only sends to the production domain.

This mismatch means the size reporting won't work in local development. Consider using a dynamic origin based on the request's Referer header or passing the origin as a parameter to make the feature work in all environments.

Suggested change
}, 'https://pyplots.ai');
}, window.location.origin);

Copilot uses AI. Check for mistakes.
}
} catch (e) {
// Silently fail if postMessage is blocked
}
}

// Report after load and after delays (for async rendering libraries)
if (document.readyState === 'complete') {
setTimeout(reportSize, 100);
setTimeout(reportSize, 500);
setTimeout(reportSize, 1000);
} else {
window.addEventListener('load', function() {
setTimeout(reportSize, 100);
setTimeout(reportSize, 500);
setTimeout(reportSize, 1000);
});
}
})();
</script>
"""

# Allowed GCS bucket for security
ALLOWED_HOST = "storage.googleapis.com"
ALLOWED_BUCKET = "pyplots-images"


def build_safe_gcs_url(url: str) -> str | None:
"""
Validate URL and return a reconstructed safe GCS URL.

This prevents SSRF by constructing the URL from hardcoded values
instead of passing user input directly.

Args:
url: User-provided URL to validate

Returns:
Reconstructed safe URL or None if validation fails
"""
try:
parsed = urlparse(url)
# Must be HTTPS
if parsed.scheme != "https":
return None
# Must be exact host (no subdomains)
if parsed.netloc != ALLOWED_HOST:
return None
# Path must start with bucket name
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) < 2:
return None
if path_parts[0] != ALLOWED_BUCKET:
return None
# Check for path traversal attempts
if ".." in parsed.path:
return None
# Validate path contains only safe characters (alphanumeric, hyphens, underscores, dots, slashes)
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The safe character validation allows the "+" character but this may not be intended for GCS URLs. The comment says "alphanumeric, hyphens, underscores, dots, slashes" but the actual check includes "+". This could be a security issue if "+" has special meaning in URL contexts that could be exploited.

Either remove "+" from the allowed characters or update the comment to document why it's included.

Suggested change
# Validate path contains only safe characters (alphanumeric, hyphens, underscores, dots, slashes)
# Validate path contains only safe characters (alphanumeric, hyphens, underscores, dots, slashes, plus)

Copilot uses AI. Check for mistakes.
safe_path = parsed.path.strip("/")
if not all(c.isalnum() or c in "-_./+" for c in safe_path):
return None
# Reconstruct URL from hardcoded values to prevent SSRF
# This breaks the taint flow by not using the original URL
return f"https://{ALLOWED_HOST}/{safe_path}"
except Exception:
return None


@router.get("/proxy/html", response_class=HTMLResponse)
async def proxy_html(url: str):
"""
Proxy an HTML file and inject size reporting script.

This endpoint fetches HTML from GCS, injects a script that reports
the content's actual dimensions via postMessage, and returns the
modified HTML. This allows the frontend to dynamically scale the
iframe based on actual content size.

Args:
url: The GCS URL to fetch (must be from allowed bucket)

Returns:
Modified HTML with size reporting script injected
"""
# Security: Validate and reconstruct URL to prevent SSRF
safe_url = build_safe_gcs_url(url)
if safe_url is None:
raise HTTPException(status_code=400, detail=f"Only URLs from {ALLOWED_HOST}/{ALLOWED_BUCKET} are allowed")

# Fetch the HTML with shorter timeout
async with httpx.AsyncClient(timeout=10.0) as client:
try:
response = await client.get(safe_url)
response.raise_for_status()
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=e.response.status_code, detail="Failed to fetch HTML") from e
except httpx.RequestError as e:
raise HTTPException(status_code=502, detail="Failed to connect to storage") from e

html_content = response.text

# Inject the size reporter script before </body>
if "</body>" in html_content:
html_content = html_content.replace("</body>", f"{SIZE_REPORTER_SCRIPT}</body>")
elif "</html>" in html_content:
html_content = html_content.replace("</html>", f"{SIZE_REPORTER_SCRIPT}</html>")
else:
# Fallback: append to end
html_content += SIZE_REPORTER_SCRIPT

return HTMLResponse(content=html_content)
11 changes: 3 additions & 8 deletions api/routers/seo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from api.cache import cache_key, get_cache, set_cache
from api.dependencies import optional_db
from core.constants import LIBRARIES_METADATA
from core.database import SpecRepository


Expand All @@ -20,7 +19,7 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
"""
Generate dynamic XML sitemap for SEO.

Includes all specs with implementations and all libraries.
Includes root, catalog page, and all specs with implementations.
"""
key = cache_key("sitemap_xml")
cached = get_cache(key)
Expand All @@ -32,6 +31,7 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
'<?xml version="1.0" encoding="UTF-8"?>',
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
" <url><loc>https://pyplots.ai/</loc></url>",
" <url><loc>https://pyplots.ai/catalog</loc></url>",
]

# Add spec URLs (only specs with implementations)
Expand All @@ -41,12 +41,7 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
for spec in specs:
if spec.impls: # Only include specs with implementations
spec_id = html.escape(spec.id)
xml_lines.append(f" <url><loc>https://pyplots.ai/?spec={spec_id}</loc></url>")

# Add library URLs (static list)
for lib in LIBRARIES_METADATA:
lib_id = html.escape(lib["id"])
xml_lines.append(f" <url><loc>https://pyplots.ai/?lib={lib_id}</loc></url>")
xml_lines.append(f" <url><loc>https://pyplots.ai/{spec_id}</loc></url>")

xml_lines.append("</urlset>")
xml = "\n".join(xml_lines)
Expand Down
7 changes: 7 additions & 0 deletions api/routers/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ async def get_spec(spec_id: str, db: AsyncSession = Depends(require_db)):
generated_by=impl.generated_by,
python_version=impl.python_version,
library_version=impl.library_version,
review_strengths=impl.review_strengths or [],
review_weaknesses=impl.review_weaknesses or [],
review_image_description=impl.review_image_description,
review_criteria_checklist=impl.review_criteria_checklist,
review_verdict=impl.review_verdict,
)
for impl in spec.impls
]
Expand All @@ -95,6 +100,8 @@ async def get_spec(spec_id: str, db: AsyncSession = Depends(require_db)):
tags=spec.tags,
issue=spec.issue,
suggested=spec.suggested,
created=spec.created.isoformat() if spec.created else None,
updated=spec.updated.isoformat() if spec.updated else None,
implementations=impls,
)
set_cache(key, result)
Expand Down
8 changes: 8 additions & 0 deletions api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ class ImplementationResponse(BaseModel):
generated_by: Optional[str] = None
python_version: Optional[str] = None
library_version: Optional[str] = None
# Review fields
review_strengths: list[str] = []
review_weaknesses: list[str] = []
review_image_description: Optional[str] = None
review_criteria_checklist: Optional[dict] = None
review_verdict: Optional[str] = None


class SpecDetailResponse(BaseModel):
Expand All @@ -37,6 +43,8 @@ class SpecDetailResponse(BaseModel):
tags: Optional[dict] = None
issue: Optional[int] = None
suggested: Optional[str] = None
created: Optional[str] = None
updated: Optional[str] = None
implementations: list[ImplementationResponse] = []


Expand Down
2 changes: 2 additions & 0 deletions app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"@mui/material": "^7.3.6",
"react": "^19.2.3",
"react-dom": "^19.2.3",
"react-helmet-async": "^2.0.5",
"react-router-dom": "^7.11.0",
"react-syntax-highlighter": "^16.1.0"
},
"devDependencies": {
Expand Down
Loading
Loading