Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
health_router,
libraries_router,
plots_router,
proxy_router,
seo_router,
specs_router,
stats_router,
Expand Down Expand Up @@ -128,6 +129,7 @@ async def add_cache_headers(request: Request, call_next):
app.include_router(plots_router)
app.include_router(download_router)
app.include_router(seo_router)
app.include_router(proxy_router)


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions api/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from api.routers.health import router as health_router
from api.routers.libraries import router as libraries_router
from api.routers.plots import router as plots_router
from api.routers.proxy import router as proxy_router
from api.routers.seo import router as seo_router
from api.routers.specs import router as specs_router
from api.routers.stats import router as stats_router
Expand All @@ -14,6 +15,7 @@
"health_router",
"libraries_router",
"plots_router",
"proxy_router",
"seo_router",
"specs_router",
"stats_router",
Expand Down
102 changes: 102 additions & 0 deletions api/routers/proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""HTML proxy endpoint for interactive plots with size reporting."""

import httpx
from fastapi import APIRouter, HTTPException
from fastapi.responses import HTMLResponse


router = APIRouter(tags=["proxy"])

# Script injected to report content size to parent window
SIZE_REPORTER_SCRIPT = """
<script>
(function() {
function reportSize() {
// Find the main content element (try common patterns for different libraries)
var content = document.querySelector(
'.bk-root, .vega-embed, .plotly, .chart-container, #container, .lp-plot, svg, canvas'
) || document.body.firstElementChild || document.body;

// Get actual rendered size
var rect = content.getBoundingClientRect();
var width = Math.max(rect.width, content.scrollWidth || 0, document.body.scrollWidth || 0);
var height = Math.max(rect.height, content.scrollHeight || 0, document.body.scrollHeight || 0);

// Add padding to account for action buttons, toolbars, and other UI elements
var padding = 40;
width += padding;
height += padding;

// Send to parent
if (width > 0 && height > 0) {
window.parent.postMessage({
type: 'pyplots-size',
width: Math.ceil(width),
height: Math.ceil(height)
}, '*');
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The postMessage uses '*' as the target origin, which allows any website to receive the size data if it embeds this iframe. This is a security risk as it could leak information about plot dimensions to malicious sites. Consider using a specific origin (e.g., window.location.origin or a configured allowed origin) instead of the wildcard.

Suggested change
// Send to parent
if (width > 0 && height > 0) {
window.parent.postMessage({
type: 'pyplots-size',
width: Math.ceil(width),
height: Math.ceil(height)
}, '*');
// Determine the target origin for postMessage: prefer parent page origin from document.referrer
var targetOrigin = window.location.origin;
if (document.referrer) {
try {
var refUrl = new URL(document.referrer);
targetOrigin = refUrl.origin;
} catch (e) {
// If parsing fails, keep the safer default of window.location.origin
}
}
// Send to parent
if (width > 0 && height > 0) {
window.parent.postMessage({
type: 'pyplots-size',
width: Math.ceil(width),
height: Math.ceil(height)
}, targetOrigin);

Copilot uses AI. Check for mistakes.
}
}

// Report after load and after delays (for async rendering libraries)
if (document.readyState === 'complete') {
setTimeout(reportSize, 100);
setTimeout(reportSize, 500);
setTimeout(reportSize, 1000);
} else {
window.addEventListener('load', function() {
setTimeout(reportSize, 100);
setTimeout(reportSize, 500);
setTimeout(reportSize, 1000);
});
}
})();
</script>
Comment on lines +14 to +60
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple network requests in the size reporter script execute without error handling. If postMessage fails (e.g., due to browser security restrictions), the error will be silently swallowed. While this might be intentional for graceful degradation, at minimum a try-catch block should be added to prevent potential issues, or console logging for debugging purposes.

Copilot uses AI. Check for mistakes.
"""

# Allowed GCS bucket for security
ALLOWED_HOST = "storage.googleapis.com"
ALLOWED_BUCKET = "pyplots-images"


@router.get("/proxy/html", response_class=HTMLResponse)
async def proxy_html(url: str):
"""
Proxy an HTML file and inject size reporting script.

This endpoint fetches HTML from GCS, injects a script that reports
the content's actual dimensions via postMessage, and returns the
modified HTML. This allows the frontend to dynamically scale the
iframe based on actual content size.

Args:
url: The GCS URL to fetch (must be from allowed bucket)

Returns:
Modified HTML with size reporting script injected
"""
# Security: Only allow URLs from our GCS bucket
if not url.startswith(f"https://{ALLOWED_HOST}/{ALLOWED_BUCKET}/"):
raise HTTPException(status_code=400, detail=f"Only URLs from {ALLOWED_HOST}/{ALLOWED_BUCKET} are allowed")
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The HTML proxy endpoint only validates that URLs start with the allowed bucket path, but this doesn't prevent path traversal attacks. An attacker could craft a URL like https://storage.googleapis.com/pyplots-images/../other-bucket/malicious.html which would pass the validation but potentially access files outside the intended bucket. Consider using URL parsing to validate the bucket name more strictly.

Copilot uses AI. Check for mistakes.

# Fetch the HTML
async with httpx.AsyncClient(timeout=30.0) as client:
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The timeout value of 30 seconds for fetching HTML from GCS is quite long and could lead to poor user experience if the storage is slow or unavailable. Most interactive HTML files should load much faster. Consider reducing this to 10 seconds or making it configurable. Also consider adding retry logic for transient failures.

Copilot uses AI. Check for mistakes.
try:
response = await client.get(url)
Comment thread Fixed
response.raise_for_status()
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=e.response.status_code, detail="Failed to fetch HTML") from e
except httpx.RequestError as e:
raise HTTPException(status_code=502, detail="Failed to connect to storage") from e

html_content = response.text

# Inject the size reporter script before </body>
if "</body>" in html_content:
html_content = html_content.replace("</body>", f"{SIZE_REPORTER_SCRIPT}</body>")
elif "</html>" in html_content:
html_content = html_content.replace("</html>", f"{SIZE_REPORTER_SCRIPT}</html>")
else:
# Fallback: append to end
html_content += SIZE_REPORTER_SCRIPT

return HTMLResponse(content=html_content)
11 changes: 3 additions & 8 deletions api/routers/seo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from api.cache import cache_key, get_cache, set_cache
from api.dependencies import optional_db
from core.constants import LIBRARIES_METADATA
from core.database import SpecRepository


Expand All @@ -20,7 +19,7 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
"""
Generate dynamic XML sitemap for SEO.

Includes all specs with implementations and all libraries.
Includes root, catalog page, and all specs with implementations.
"""
key = cache_key("sitemap_xml")
cached = get_cache(key)
Expand All @@ -32,6 +31,7 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
'<?xml version="1.0" encoding="UTF-8"?>',
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
" <url><loc>https://pyplots.ai/</loc></url>",
" <url><loc>https://pyplots.ai/catalog</loc></url>",
]

# Add spec URLs (only specs with implementations)
Expand All @@ -41,12 +41,7 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
for spec in specs:
if spec.impls: # Only include specs with implementations
spec_id = html.escape(spec.id)
xml_lines.append(f" <url><loc>https://pyplots.ai/?spec={spec_id}</loc></url>")

# Add library URLs (static list)
for lib in LIBRARIES_METADATA:
lib_id = html.escape(lib["id"])
xml_lines.append(f" <url><loc>https://pyplots.ai/?lib={lib_id}</loc></url>")
xml_lines.append(f" <url><loc>https://pyplots.ai/{spec_id}</loc></url>")

xml_lines.append("</urlset>")
xml = "\n".join(xml_lines)
Expand Down
7 changes: 7 additions & 0 deletions api/routers/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ async def get_spec(spec_id: str, db: AsyncSession = Depends(require_db)):
generated_by=impl.generated_by,
python_version=impl.python_version,
library_version=impl.library_version,
review_strengths=impl.review_strengths or [],
review_weaknesses=impl.review_weaknesses or [],
review_image_description=impl.review_image_description,
review_criteria_checklist=impl.review_criteria_checklist,
review_verdict=impl.review_verdict,
)
for impl in spec.impls
]
Expand All @@ -95,6 +100,8 @@ async def get_spec(spec_id: str, db: AsyncSession = Depends(require_db)):
tags=spec.tags,
issue=spec.issue,
suggested=spec.suggested,
created=spec.created.isoformat() if spec.created else None,
updated=spec.updated.isoformat() if spec.updated else None,
implementations=impls,
)
set_cache(key, result)
Expand Down
8 changes: 8 additions & 0 deletions api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ class ImplementationResponse(BaseModel):
generated_by: Optional[str] = None
python_version: Optional[str] = None
library_version: Optional[str] = None
# Review fields
review_strengths: list[str] = []
review_weaknesses: list[str] = []
review_image_description: Optional[str] = None
review_criteria_checklist: Optional[dict] = None
review_verdict: Optional[str] = None


class SpecDetailResponse(BaseModel):
Expand All @@ -37,6 +43,8 @@ class SpecDetailResponse(BaseModel):
tags: Optional[dict] = None
issue: Optional[int] = None
suggested: Optional[str] = None
created: Optional[str] = None
updated: Optional[str] = None
implementations: list[ImplementationResponse] = []


Expand Down
2 changes: 2 additions & 0 deletions app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"@mui/material": "^7.3.6",
"react": "^19.2.3",
"react-dom": "^19.2.3",
"react-helmet-async": "^2.0.5",
"react-router-dom": "^7.11.0",
"react-syntax-highlighter": "^16.1.0"
},
"devDependencies": {
Expand Down
Loading
Loading