Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 123 additions & 3 deletions api/routers/seo.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""SEO endpoints (sitemap)."""
"""SEO endpoints (sitemap, bot-optimized pages)."""

import html

from fastapi import APIRouter, Depends
from fastapi.responses import Response
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import HTMLResponse, Response
from sqlalchemy.ext.asyncio import AsyncSession

from api.cache import cache_key, get_cache, set_cache
Expand All @@ -14,6 +14,34 @@
router = APIRouter(tags=["seo"])


# Minimal HTML template for social media bots (meta tags are what matters)
BOT_HTML_TEMPLATE = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>{title}</title>
<meta name="description" content="{description}" />
<meta property="og:title" content="{title}" />
<meta property="og:description" content="{description}" />
<meta property="og:image" content="{image}" />
<meta property="og:url" content="{url}" />
<meta property="og:type" content="website" />
<meta property="og:site_name" content="pyplots.ai" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="{title}" />
<meta name="twitter:description" content="{description}" />
<meta name="twitter:image" content="{image}" />
<link rel="canonical" href="{url}" />
</head>
<body><h1>{title}</h1><p>{description}</p></body>
</html>"""

DEFAULT_IMAGE = "https://pyplots.ai/og-image.png"
DEFAULT_DESCRIPTION = (
"Library-agnostic, AI-powered Python plotting examples. Automatically generated, tested, and maintained."
)


@router.get("/sitemap.xml")
async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
"""
Expand Down Expand Up @@ -53,3 +81,95 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):

set_cache(key, xml)
return Response(content=xml, media_type="application/xml")


# =============================================================================
# Bot SEO Proxy Endpoints
# These endpoints serve HTML with correct meta tags for social media bots.
# nginx proxies bot requests here based on User-Agent detection.
# =============================================================================


@router.get("/seo-proxy/")
async def seo_home():
"""Bot-optimized home page with correct og:tags."""
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title="pyplots.ai", description=DEFAULT_DESCRIPTION, image=DEFAULT_IMAGE, url="https://pyplots.ai/"
)
)


@router.get("/seo-proxy/catalog")
async def seo_catalog():
"""Bot-optimized catalog page with correct og:tags."""
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title="Catalog | pyplots.ai",
description="Browse all Python plotting specifications alphabetically. Find matplotlib, seaborn, plotly, bokeh, altair examples.",
image=DEFAULT_IMAGE,
url="https://pyplots.ai/catalog",
)
)


@router.get("/seo-proxy/{spec_id}")
async def seo_spec_overview(spec_id: str, db: AsyncSession | None = Depends(optional_db)):
"""Bot-optimized spec overview page with correct og:tags."""
if db is None:
# Fallback when DB unavailable
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{spec_id} | pyplots.ai",
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The spec_id in the title is not HTML-escaped, which could lead to XSS if a malicious spec_id is provided. All user-controlled inputs should be escaped before insertion into HTML. The url parameter already escapes it correctly, but the title needs the same treatment.

Suggested change
title=f"{spec_id} | pyplots.ai",
title=f"{html.escape(spec_id)} | pyplots.ai",

Copilot uses AI. Check for mistakes.
description=DEFAULT_DESCRIPTION,
image=DEFAULT_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}",
)
Comment on lines +122 to +127

Check warning

Code scanning / CodeQL

Reflected server-side cross-site scripting Medium

Cross-site scripting vulnerability due to a
user-provided value
.

Copilot Autofix

AI 4 months ago

In general, to fix reflected server-side XSS, every user-controlled value inserted into an HTML document must be properly escaped for the context in which it appears (HTML body, attribute, URL, etc.). In this file, all uses of spec_id in HTML contexts should be consistently passed through html.escape, just as is already done for url in this same branch and for title/description when the DB is available.

The single best minimal fix is to escape spec_id when it is interpolated into the title for the DB-unavailable fallback in seo_spec_overview. Specifically, change line 124 from title=f"{spec_id} | pyplots.ai", to title=f"{html.escape(spec_id)} | pyplots.ai",. This mirrors the escaping already used for the url field in the same response and for the title field later in the function when spec is loaded from the database. No new imports are needed because html is already imported at the top of api/routers/seo.py. No other behavioral changes are introduced; only the unsafe direct inclusion of the raw path parameter into HTML is corrected.

Suggested changeset 1
api/routers/seo.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/api/routers/seo.py b/api/routers/seo.py
--- a/api/routers/seo.py
+++ b/api/routers/seo.py
@@ -121,7 +121,7 @@
         # Fallback when DB unavailable
         return HTMLResponse(
             BOT_HTML_TEMPLATE.format(
-                title=f"{spec_id} | pyplots.ai",
+                title=f"{html.escape(spec_id)} | pyplots.ai",
                 description=DEFAULT_DESCRIPTION,
                 image=DEFAULT_IMAGE,
                 url=f"https://pyplots.ai/{html.escape(spec_id)}",
EOF
@@ -121,7 +121,7 @@
# Fallback when DB unavailable
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{spec_id} | pyplots.ai",
title=f"{html.escape(spec_id)} | pyplots.ai",
description=DEFAULT_DESCRIPTION,
image=DEFAULT_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}",
Copilot is powered by AI and may make mistakes. Always verify output.
Unable to commit as this autofix suggestion is now outdated
)

repo = SpecRepository(db)
spec = await repo.get_by_id(spec_id)
if not spec:
raise HTTPException(status_code=404, detail="Spec not found")

return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{html.escape(spec.title)} | pyplots.ai",
description=html.escape(spec.description or DEFAULT_DESCRIPTION),
image=DEFAULT_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}",
)
)


@router.get("/seo-proxy/{spec_id}/{library}")
async def seo_spec_implementation(spec_id: str, library: str, db: AsyncSession | None = Depends(optional_db)):
"""Bot-optimized spec implementation page with dynamic og:image from preview_url."""
if db is None:
# Fallback when DB unavailable
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{html.escape(spec_id)} - {html.escape(library)} | pyplots.ai",
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The library parameter in the title is not HTML-escaped, which could lead to XSS if a malicious library value is provided. All user-controlled inputs should be escaped before insertion into HTML.

Copilot uses AI. Check for mistakes.
description=DEFAULT_DESCRIPTION,
image=DEFAULT_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}",
)
Comment on lines +151 to +156

Check warning

Code scanning / CodeQL

Reflected server-side cross-site scripting Medium

Cross-site scripting vulnerability due to a user-provided value.
Cross-site scripting vulnerability due to a user-provided value.

Copilot Autofix

AI 4 months ago

In general, to fix reflected server-side XSS in this endpoint, all user-controlled values (spec_id, library) must be HTML-escaped before being interpolated into BOT_HTML_TEMPLATE, not only when used in URLs but also when used in text nodes like the <title> element. The Python standard library’s html.escape() is already imported and used for some fields; we should extend its use to every occurrence where raw user input is inserted into the template.

Concretely, in seo_spec_implementation’s DB-unavailable fallback (lines ~155–161), the title field currently embeds spec_id and library without escaping. We should wrap these in html.escape() like is already done for the url field. This preserves existing functionality (the same values are displayed) but ensures any <, >, &, quotes, etc. are encoded and cannot break out of the HTML context. No new imports or helpers are needed; we only adjust the f-string expressions in that block. The rest of the function already escapes user-derived values where necessary.

Suggested changeset 1
api/routers/seo.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/api/routers/seo.py b/api/routers/seo.py
--- a/api/routers/seo.py
+++ b/api/routers/seo.py
@@ -154,7 +154,7 @@
         # Fallback when DB unavailable
         return HTMLResponse(
             BOT_HTML_TEMPLATE.format(
-                title=f"{spec_id} - {library} | pyplots.ai",
+                title=f"{html.escape(spec_id)} - {html.escape(library)} | pyplots.ai",
                 description=DEFAULT_DESCRIPTION,
                 image=DEFAULT_IMAGE,
                 url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}",
EOF
@@ -154,7 +154,7 @@
# Fallback when DB unavailable
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{spec_id} - {library} | pyplots.ai",
title=f"{html.escape(spec_id)} - {html.escape(library)} | pyplots.ai",
description=DEFAULT_DESCRIPTION,
image=DEFAULT_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}",
Copilot is powered by AI and may make mistakes. Always verify output.
Unable to commit as this autofix suggestion is now outdated
)

repo = SpecRepository(db)
spec = await repo.get_by_id(spec_id)
if not spec:
raise HTTPException(status_code=404, detail="Spec not found")

# Find the implementation for this library
impl = next((i for i in spec.impls if i.library_id == library), None)
image = impl.preview_url if impl and impl.preview_url else DEFAULT_IMAGE

return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{html.escape(spec.title)} - {html.escape(library)} | pyplots.ai",
description=html.escape(spec.description or DEFAULT_DESCRIPTION),
image=html.escape(image, quote=True),
url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}",
)
)
31 changes: 31 additions & 0 deletions app/nginx.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# Bot detection for SEO - social media crawlers need pre-rendered meta tags
map $http_user_agent $is_bot {
default 0;
~*twitterbot 1;
~*facebookexternalhit 1;
~*linkedinbot 1;
~*slackbot 1;
~*telegrambot 1;
~*whatsapp 1;
~*googlebot 1;
~*bingbot 1;
~*discordbot 1;
~*pinterestbot 1;
~*applebot 1;
}

server {
listen 8080;
server_name _;
Expand Down Expand Up @@ -25,8 +41,23 @@ server {
add_header Expires "0";
}

# Named location for bot SEO proxy
location @seo_proxy {
proxy_pass https://api.pyplots.ai/seo-proxy$request_uri;
proxy_set_header Host api.pyplots.ai;
proxy_ssl_server_name on;
proxy_ssl_verify on;
proxy_ssl_trusted_certificate /etc/ssl/certs/ca-certificates.crt;
}

# SPA routing - serve index.html for all routes
# Bots get redirected to backend for proper meta tags
location / {
# Redirect bots to SEO proxy via error_page trick (nginx-safe pattern)
error_page 418 = @seo_proxy;
if ($is_bot) {
return 418;
}
try_files $uri $uri/ /index.html;
}

Expand Down
Binary file modified app/public/og-image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
116 changes: 116 additions & 0 deletions tests/unit/api/test_routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,122 @@ def test_sitemap_with_db(self, db_client, mock_spec) -> None:
assert "https://pyplots.ai/scatter-basic/matplotlib</loc>" in response.text


class TestSeoProxyRouter:
"""Tests for SEO proxy endpoints (bot-optimized pages)."""

def test_seo_home(self, client: TestClient) -> None:
"""SEO home page should return HTML with og:tags."""
response = client.get("/seo-proxy/")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
assert "og:title" in response.text
assert "pyplots.ai" in response.text
assert "og:image" in response.text
assert "twitter:card" in response.text

def test_seo_catalog(self, client: TestClient) -> None:
"""SEO catalog page should return HTML with og:tags."""
response = client.get("/seo-proxy/catalog")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
assert "Catalog" in response.text
assert "og:title" in response.text
assert "https://pyplots.ai/catalog" in response.text

def test_seo_spec_overview_without_db(self, client: TestClient) -> None:
"""SEO spec overview should return fallback HTML when DB unavailable."""
with patch(DB_CONFIG_PATCH, return_value=False):
response = client.get("/seo-proxy/scatter-basic")
assert response.status_code == 200
assert "og:title" in response.text
assert "scatter-basic" in response.text
assert "og-image.png" in response.text # Default image

def test_seo_spec_overview_with_db(self, db_client, mock_spec) -> None:
"""SEO spec overview should return HTML with spec title from DB."""
client, _ = db_client

mock_spec_repo = MagicMock()
mock_spec_repo.get_by_id = AsyncMock(return_value=mock_spec)

with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo):
response = client.get("/seo-proxy/scatter-basic")
assert response.status_code == 200
assert "Basic Scatter Plot" in response.text
assert "og:title" in response.text
assert "https://pyplots.ai/scatter-basic" in response.text

def test_seo_spec_overview_not_found(self, db_client) -> None:
"""SEO spec overview should return 404 when spec not found."""
client, _ = db_client

mock_spec_repo = MagicMock()
mock_spec_repo.get_by_id = AsyncMock(return_value=None)

with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo):
response = client.get("/seo-proxy/nonexistent-spec")
assert response.status_code == 404

def test_seo_spec_implementation_without_db(self, client: TestClient) -> None:
"""SEO spec implementation should return fallback HTML when DB unavailable."""
with patch(DB_CONFIG_PATCH, return_value=False):
response = client.get("/seo-proxy/scatter-basic/matplotlib")
assert response.status_code == 200
assert "og:title" in response.text
assert "scatter-basic" in response.text
assert "matplotlib" in response.text
assert "og-image.png" in response.text # Default image

def test_seo_spec_implementation_with_preview_url(self, db_client, mock_spec) -> None:
"""SEO spec implementation should use preview_url from implementation."""
client, _ = db_client

mock_spec_repo = MagicMock()
mock_spec_repo.get_by_id = AsyncMock(return_value=mock_spec)

with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo):
response = client.get("/seo-proxy/scatter-basic/matplotlib")
assert response.status_code == 200
assert "Basic Scatter Plot" in response.text
assert "matplotlib" in response.text
# Should have actual preview URL from implementation
assert TEST_IMAGE_URL in response.text or "og:image" in response.text

def test_seo_spec_implementation_not_found(self, db_client) -> None:
"""SEO spec implementation should return 404 when spec not found."""
client, _ = db_client

mock_spec_repo = MagicMock()
mock_spec_repo.get_by_id = AsyncMock(return_value=None)

with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo):
response = client.get("/seo-proxy/nonexistent-spec/matplotlib")
assert response.status_code == 404

def test_seo_spec_implementation_fallback_image(self, db_client, mock_spec) -> None:
"""SEO spec implementation should use default image when impl has no preview."""
client, _ = db_client

# Create a spec with implementation that has no preview_url
mock_impl_no_preview = MagicMock()
mock_impl_no_preview.library_id = "seaborn"
mock_impl_no_preview.preview_url = None

mock_spec_no_preview = MagicMock()
mock_spec_no_preview.id = "scatter-basic"
mock_spec_no_preview.title = "Basic Scatter Plot"
mock_spec_no_preview.description = "A basic scatter plot"
mock_spec_no_preview.impls = [mock_impl_no_preview]

mock_spec_repo = MagicMock()
mock_spec_repo.get_by_id = AsyncMock(return_value=mock_spec_no_preview)

with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo):
response = client.get("/seo-proxy/scatter-basic/seaborn")
assert response.status_code == 200
assert "og-image.png" in response.text # Default image used


class TestPlotsRouter:
"""Tests for plots filter router."""

Expand Down
Loading