diff --git a/api/routers/seo.py b/api/routers/seo.py index 9127fcd97e..8666072fa6 100644 --- a/api/routers/seo.py +++ b/api/routers/seo.py @@ -1,9 +1,9 @@ -"""SEO endpoints (sitemap).""" +"""SEO endpoints (sitemap, bot-optimized pages).""" import html -from fastapi import APIRouter, Depends -from fastapi.responses import Response +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import HTMLResponse, Response from sqlalchemy.ext.asyncio import AsyncSession from api.cache import cache_key, get_cache, set_cache @@ -14,6 +14,34 @@ router = APIRouter(tags=["seo"]) +# Minimal HTML template for social media bots (meta tags are what matters) +BOT_HTML_TEMPLATE = """ + + + + {title} + + + + + + + + + + + + + +

{title}

{description}

+""" + +DEFAULT_IMAGE = "https://pyplots.ai/og-image.png" +DEFAULT_DESCRIPTION = ( + "Library-agnostic, AI-powered Python plotting examples. Automatically generated, tested, and maintained." +) + + @router.get("/sitemap.xml") async def get_sitemap(db: AsyncSession | None = Depends(optional_db)): """ @@ -53,3 +81,95 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)): set_cache(key, xml) return Response(content=xml, media_type="application/xml") + + +# ============================================================================= +# Bot SEO Proxy Endpoints +# These endpoints serve HTML with correct meta tags for social media bots. +# nginx proxies bot requests here based on User-Agent detection. +# ============================================================================= + + +@router.get("/seo-proxy/") +async def seo_home(): + """Bot-optimized home page with correct og:tags.""" + return HTMLResponse( + BOT_HTML_TEMPLATE.format( + title="pyplots.ai", description=DEFAULT_DESCRIPTION, image=DEFAULT_IMAGE, url="https://pyplots.ai/" + ) + ) + + +@router.get("/seo-proxy/catalog") +async def seo_catalog(): + """Bot-optimized catalog page with correct og:tags.""" + return HTMLResponse( + BOT_HTML_TEMPLATE.format( + title="Catalog | pyplots.ai", + description="Browse all Python plotting specifications alphabetically. Find matplotlib, seaborn, plotly, bokeh, altair examples.", + image=DEFAULT_IMAGE, + url="https://pyplots.ai/catalog", + ) + ) + + +@router.get("/seo-proxy/{spec_id}") +async def seo_spec_overview(spec_id: str, db: AsyncSession | None = Depends(optional_db)): + """Bot-optimized spec overview page with correct og:tags.""" + if db is None: + # Fallback when DB unavailable + return HTMLResponse( + BOT_HTML_TEMPLATE.format( + title=f"{spec_id} | pyplots.ai", + description=DEFAULT_DESCRIPTION, + image=DEFAULT_IMAGE, + url=f"https://pyplots.ai/{html.escape(spec_id)}", + ) + ) + + repo = SpecRepository(db) + spec = await repo.get_by_id(spec_id) + if not spec: + raise HTTPException(status_code=404, detail="Spec not found") + + return HTMLResponse( + BOT_HTML_TEMPLATE.format( + title=f"{html.escape(spec.title)} | pyplots.ai", + description=html.escape(spec.description or DEFAULT_DESCRIPTION), + image=DEFAULT_IMAGE, + url=f"https://pyplots.ai/{html.escape(spec_id)}", + ) + ) + + +@router.get("/seo-proxy/{spec_id}/{library}") +async def seo_spec_implementation(spec_id: str, library: str, db: AsyncSession | None = Depends(optional_db)): + """Bot-optimized spec implementation page with dynamic og:image from preview_url.""" + if db is None: + # Fallback when DB unavailable + return HTMLResponse( + BOT_HTML_TEMPLATE.format( + title=f"{html.escape(spec_id)} - {html.escape(library)} | pyplots.ai", + description=DEFAULT_DESCRIPTION, + image=DEFAULT_IMAGE, + url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}", + ) + ) + + repo = SpecRepository(db) + spec = await repo.get_by_id(spec_id) + if not spec: + raise HTTPException(status_code=404, detail="Spec not found") + + # Find the implementation for this library + impl = next((i for i in spec.impls if i.library_id == library), None) + image = impl.preview_url if impl and impl.preview_url else DEFAULT_IMAGE + + return HTMLResponse( + BOT_HTML_TEMPLATE.format( + title=f"{html.escape(spec.title)} - {html.escape(library)} | pyplots.ai", + description=html.escape(spec.description or DEFAULT_DESCRIPTION), + image=html.escape(image, quote=True), + url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}", + ) + ) diff --git a/app/nginx.conf b/app/nginx.conf index ac4ef14860..4cff3ae8f4 100644 --- a/app/nginx.conf +++ b/app/nginx.conf @@ -1,3 +1,19 @@ +# Bot detection for SEO - social media crawlers need pre-rendered meta tags +map $http_user_agent $is_bot { + default 0; + ~*twitterbot 1; + ~*facebookexternalhit 1; + ~*linkedinbot 1; + ~*slackbot 1; + ~*telegrambot 1; + ~*whatsapp 1; + ~*googlebot 1; + ~*bingbot 1; + ~*discordbot 1; + ~*pinterestbot 1; + ~*applebot 1; +} + server { listen 8080; server_name _; @@ -25,8 +41,23 @@ server { add_header Expires "0"; } + # Named location for bot SEO proxy + location @seo_proxy { + proxy_pass https://api.pyplots.ai/seo-proxy$request_uri; + proxy_set_header Host api.pyplots.ai; + proxy_ssl_server_name on; + proxy_ssl_verify on; + proxy_ssl_trusted_certificate /etc/ssl/certs/ca-certificates.crt; + } + # SPA routing - serve index.html for all routes + # Bots get redirected to backend for proper meta tags location / { + # Redirect bots to SEO proxy via error_page trick (nginx-safe pattern) + error_page 418 = @seo_proxy; + if ($is_bot) { + return 418; + } try_files $uri $uri/ /index.html; } diff --git a/app/public/og-image.png b/app/public/og-image.png index 964eecd1e4..0533947b06 100644 Binary files a/app/public/og-image.png and b/app/public/og-image.png differ diff --git a/tests/unit/api/test_routers.py b/tests/unit/api/test_routers.py index ca5dab1518..2d15c0aa3b 100644 --- a/tests/unit/api/test_routers.py +++ b/tests/unit/api/test_routers.py @@ -441,6 +441,122 @@ def test_sitemap_with_db(self, db_client, mock_spec) -> None: assert "https://pyplots.ai/scatter-basic/matplotlib" in response.text +class TestSeoProxyRouter: + """Tests for SEO proxy endpoints (bot-optimized pages).""" + + def test_seo_home(self, client: TestClient) -> None: + """SEO home page should return HTML with og:tags.""" + response = client.get("/seo-proxy/") + assert response.status_code == 200 + assert "text/html" in response.headers["content-type"] + assert "og:title" in response.text + assert "pyplots.ai" in response.text + assert "og:image" in response.text + assert "twitter:card" in response.text + + def test_seo_catalog(self, client: TestClient) -> None: + """SEO catalog page should return HTML with og:tags.""" + response = client.get("/seo-proxy/catalog") + assert response.status_code == 200 + assert "text/html" in response.headers["content-type"] + assert "Catalog" in response.text + assert "og:title" in response.text + assert "https://pyplots.ai/catalog" in response.text + + def test_seo_spec_overview_without_db(self, client: TestClient) -> None: + """SEO spec overview should return fallback HTML when DB unavailable.""" + with patch(DB_CONFIG_PATCH, return_value=False): + response = client.get("/seo-proxy/scatter-basic") + assert response.status_code == 200 + assert "og:title" in response.text + assert "scatter-basic" in response.text + assert "og-image.png" in response.text # Default image + + def test_seo_spec_overview_with_db(self, db_client, mock_spec) -> None: + """SEO spec overview should return HTML with spec title from DB.""" + client, _ = db_client + + mock_spec_repo = MagicMock() + mock_spec_repo.get_by_id = AsyncMock(return_value=mock_spec) + + with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo): + response = client.get("/seo-proxy/scatter-basic") + assert response.status_code == 200 + assert "Basic Scatter Plot" in response.text + assert "og:title" in response.text + assert "https://pyplots.ai/scatter-basic" in response.text + + def test_seo_spec_overview_not_found(self, db_client) -> None: + """SEO spec overview should return 404 when spec not found.""" + client, _ = db_client + + mock_spec_repo = MagicMock() + mock_spec_repo.get_by_id = AsyncMock(return_value=None) + + with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo): + response = client.get("/seo-proxy/nonexistent-spec") + assert response.status_code == 404 + + def test_seo_spec_implementation_without_db(self, client: TestClient) -> None: + """SEO spec implementation should return fallback HTML when DB unavailable.""" + with patch(DB_CONFIG_PATCH, return_value=False): + response = client.get("/seo-proxy/scatter-basic/matplotlib") + assert response.status_code == 200 + assert "og:title" in response.text + assert "scatter-basic" in response.text + assert "matplotlib" in response.text + assert "og-image.png" in response.text # Default image + + def test_seo_spec_implementation_with_preview_url(self, db_client, mock_spec) -> None: + """SEO spec implementation should use preview_url from implementation.""" + client, _ = db_client + + mock_spec_repo = MagicMock() + mock_spec_repo.get_by_id = AsyncMock(return_value=mock_spec) + + with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo): + response = client.get("/seo-proxy/scatter-basic/matplotlib") + assert response.status_code == 200 + assert "Basic Scatter Plot" in response.text + assert "matplotlib" in response.text + # Should have actual preview URL from implementation + assert TEST_IMAGE_URL in response.text or "og:image" in response.text + + def test_seo_spec_implementation_not_found(self, db_client) -> None: + """SEO spec implementation should return 404 when spec not found.""" + client, _ = db_client + + mock_spec_repo = MagicMock() + mock_spec_repo.get_by_id = AsyncMock(return_value=None) + + with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo): + response = client.get("/seo-proxy/nonexistent-spec/matplotlib") + assert response.status_code == 404 + + def test_seo_spec_implementation_fallback_image(self, db_client, mock_spec) -> None: + """SEO spec implementation should use default image when impl has no preview.""" + client, _ = db_client + + # Create a spec with implementation that has no preview_url + mock_impl_no_preview = MagicMock() + mock_impl_no_preview.library_id = "seaborn" + mock_impl_no_preview.preview_url = None + + mock_spec_no_preview = MagicMock() + mock_spec_no_preview.id = "scatter-basic" + mock_spec_no_preview.title = "Basic Scatter Plot" + mock_spec_no_preview.description = "A basic scatter plot" + mock_spec_no_preview.impls = [mock_impl_no_preview] + + mock_spec_repo = MagicMock() + mock_spec_repo.get_by_id = AsyncMock(return_value=mock_spec_no_preview) + + with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo): + response = client.get("/seo-proxy/scatter-basic/seaborn") + assert response.status_code == 200 + assert "og-image.png" in response.text # Default image used + + class TestPlotsRouter: """Tests for plots filter router."""