diff --git a/api/analytics.py b/api/analytics.py new file mode 100644 index 0000000000..f5babd98d5 --- /dev/null +++ b/api/analytics.py @@ -0,0 +1,141 @@ +"""Server-side Plausible Analytics for og:image tracking. + +Tracks og:image requests from social media bots (Twitter, WhatsApp, etc.) +since bots don't execute JavaScript and can't be tracked client-side. + +Uses fire-and-forget pattern to avoid delaying responses. +""" + +import asyncio +import logging + +import httpx +from fastapi import Request + + +logger = logging.getLogger(__name__) + +PLAUSIBLE_ENDPOINT = "https://plausible.io/api/event" +DOMAIN = "pyplots.ai" + +# All platforms from nginx.conf bot detection (27 total) +PLATFORM_PATTERNS = { + # Social Media + "twitter": "twitterbot", + "facebook": "facebookexternalhit", + "linkedin": "linkedinbot", + "pinterest": "pinterestbot", + "reddit": "redditbot", + "tumblr": "tumblr", + "mastodon": "mastodon", + # Messaging Apps + "slack": "slackbot", + "discord": "discordbot", + "telegram": "telegrambot", + "whatsapp": "whatsapp", + "signal": "signal", + "viber": "viber", + "skype": "skypeuripreview", + "teams": "microsoft teams", + "snapchat": "snapchat", + # Search Engines + "google": "googlebot", + "bing": "bingbot", + "yandex": "yandexbot", + "duckduckgo": "duckduckbot", + "baidu": "baiduspider", + "apple": "applebot", + # Link Preview Services + "embedly": "embedly", + "quora": "quora link preview", + "outbrain": "outbrain", + "rogerbot": "rogerbot", + "showyoubot": "showyoubot", +} + + +def detect_platform(user_agent: str) -> str: + """Detect platform from User-Agent string. + + Args: + user_agent: The User-Agent header value + + Returns: + Platform name (e.g., 'twitter', 'whatsapp') or 'unknown' + """ + ua_lower = user_agent.lower() + for platform, pattern in PLATFORM_PATTERNS.items(): + if pattern in ua_lower: + return platform + return "unknown" + + +async def _send_plausible_event(user_agent: str, client_ip: str, name: str, url: str, props: dict) -> None: + """Internal: Send event to Plausible (called as background task). + + Args: + user_agent: Original User-Agent header + client_ip: Client IP for geolocation + name: Event name + url: Page URL + props: Event properties + """ + try: + async with httpx.AsyncClient(timeout=5.0) as client: + await client.post( + PLAUSIBLE_ENDPOINT, + headers={"User-Agent": user_agent, "X-Forwarded-For": client_ip, "Content-Type": "application/json"}, + json={"name": name, "url": url, "domain": DOMAIN, "props": props}, + ) + except Exception as e: + logger.debug(f"Plausible tracking failed (non-critical): {e}") + + +def track_og_image( + request: Request, + page: str, + spec: str | None = None, + library: str | None = None, + filters: dict[str, str] | None = None, +) -> None: + """Track og:image request (fire-and-forget). + + Sends event to Plausible in background without blocking response. + + Args: + request: FastAPI request for headers + page: Page type ('home', 'catalog', 'spec_overview', 'spec_detail') + spec: Spec ID (optional) + library: Library ID (optional) + filters: Query params for filtered home page (e.g., {'lib': 'plotly', 'dom': 'statistics'}) + """ + user_agent = request.headers.get("user-agent", "") + client_ip = request.headers.get("x-forwarded-for", request.client.host if request.client else "") + platform = detect_platform(user_agent) + + # Build URL based on page type + if page == "home": + url = "https://pyplots.ai/" + elif page == "catalog": + url = "https://pyplots.ai/catalog" + elif spec is not None and library: + url = f"https://pyplots.ai/{spec}/{library}" + elif spec is not None: + url = f"https://pyplots.ai/{spec}" + else: + # Fallback: missing spec for a spec-based page + url = "https://pyplots.ai/" + + props: dict[str, str] = {"page": page, "platform": platform} + if spec: + props["spec"] = spec + if library: + props["library"] = library + if filters: + # Add each filter as separate prop (e.g., filter_lib, filter_dom) + # This handles comma-separated values like lib=plotly,matplotlib + for key, value in filters.items(): + props[f"filter_{key}"] = value + + # Fire-and-forget: create task without awaiting + asyncio.create_task(_send_plausible_event(user_agent, client_ip, "og_image_view", url, props)) diff --git a/api/routers/og_images.py b/api/routers/og_images.py index 3c95087223..df67b3f640 100644 --- a/api/routers/og_images.py +++ b/api/routers/og_images.py @@ -1,21 +1,64 @@ """OG Image endpoints for branded social media preview images.""" import asyncio +from pathlib import Path import httpx -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import Response from sqlalchemy.ext.asyncio import AsyncSession +from api.analytics import track_og_image from api.cache import cache_key, get_cache, set_cache from api.dependencies import optional_db from core.database import SpecRepository from core.images import create_branded_og_image, create_og_collage +# Static og:image (loaded once at startup) +_STATIC_OG_IMAGE: bytes | None = None + + +def _get_static_og_image() -> bytes: + """Load static og-image.png (cached in memory).""" + global _STATIC_OG_IMAGE + if _STATIC_OG_IMAGE is None: + path = Path(__file__).parent.parent.parent / "app" / "public" / "og-image.png" + try: + _STATIC_OG_IMAGE = path.read_bytes() + except FileNotFoundError as exc: + raise HTTPException(status_code=500, detail="Static OG image not found") from exc + return _STATIC_OG_IMAGE + + router = APIRouter(prefix="/og", tags=["og-images"]) +@router.get("/home.png") +async def get_home_og_image(request: Request) -> Response: + """OG image for home page with tracking. + + Supports filter params (e.g., ?lib=plotly&dom=statistics) for tracking shared filtered URLs. + """ + # Capture filter params for tracking (e.g., ?lib=plotly&dom=statistics) + filters = dict(request.query_params) if request.query_params else None + track_og_image(request, page="home", filters=filters) + + return Response( + content=_get_static_og_image(), media_type="image/png", headers={"Cache-Control": "public, max-age=86400"} + ) + + +@router.get("/catalog.png") +async def get_catalog_og_image(request: Request) -> Response: + """OG image for catalog page with tracking.""" + track_og_image(request, page="catalog") + + return Response( + content=_get_static_og_image(), media_type="image/png", headers={"Cache-Control": "public, max-age=86400"} + ) + + async def _fetch_image(url: str) -> bytes: """Fetch an image from a URL.""" async with httpx.AsyncClient(timeout=30.0) as client: @@ -26,12 +69,15 @@ async def _fetch_image(url: str) -> bytes: @router.get("/{spec_id}/{library}.png") async def get_branded_impl_image( - spec_id: str, library: str, db: AsyncSession | None = Depends(optional_db) + spec_id: str, library: str, request: Request, db: AsyncSession | None = Depends(optional_db) ) -> Response: """Get a branded OG image for an implementation. Returns a 1200x630 PNG with pyplots.ai header and the plot image. """ + # Track og:image request (fire-and-forget) + track_og_image(request, page="spec_detail", spec=spec_id, library=library) + # Check cache first key = cache_key("og", spec_id, library) cached = get_cache(key) @@ -70,12 +116,17 @@ async def get_branded_impl_image( @router.get("/{spec_id}.png") -async def get_spec_collage_image(spec_id: str, db: AsyncSession | None = Depends(optional_db)) -> Response: +async def get_spec_collage_image( + spec_id: str, request: Request, db: AsyncSession | None = Depends(optional_db) +) -> Response: """Get a collage OG image for a spec (showing top 6 implementations by quality). Returns a 1200x630 PNG with pyplots.ai branding and a 2x3 grid of implementations, sorted by quality_score descending. """ + # Track og:image request (fire-and-forget) + track_og_image(request, page="spec_overview", spec=spec_id) + # Check cache first key = cache_key("og", spec_id, "collage") cached = get_cache(key) diff --git a/api/routers/seo.py b/api/routers/seo.py index d9b3df1eb4..de103eab39 100644 --- a/api/routers/seo.py +++ b/api/routers/seo.py @@ -2,7 +2,7 @@ import html -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import HTMLResponse, Response from sqlalchemy.ext.asyncio import AsyncSession @@ -36,7 +36,9 @@

{title}

{description}

""" -DEFAULT_IMAGE = "https://pyplots.ai/og-image.png" +# Route through API for tracking (was: pyplots.ai/og-image.png) +DEFAULT_HOME_IMAGE = "https://api.pyplots.ai/og/home.png" +DEFAULT_CATALOG_IMAGE = "https://api.pyplots.ai/og/catalog.png" DEFAULT_DESCRIPTION = "library-agnostic, ai-powered python plotting." @@ -89,12 +91,19 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)): @router.get("/seo-proxy/") -async def seo_home(): - """Bot-optimized home page with correct og:tags.""" +async def seo_home(request: Request): + """Bot-optimized home page with correct og:tags. + + Passes query params (e.g., ?lib=plotly&dom=statistics) to og:image URL for tracking. + """ + # Pass filter params to og:image URL for tracking shared filtered URLs + # Use html.escape to prevent XSS via query params + query_string = html.escape(str(request.query_params), quote=True) if request.query_params else "" + image_url = f"{DEFAULT_HOME_IMAGE}?{query_string}" if query_string else DEFAULT_HOME_IMAGE + page_url = f"https://pyplots.ai/?{query_string}" if query_string else "https://pyplots.ai/" + return HTMLResponse( - BOT_HTML_TEMPLATE.format( - title="pyplots.ai", description=DEFAULT_DESCRIPTION, image=DEFAULT_IMAGE, url="https://pyplots.ai/" - ) + BOT_HTML_TEMPLATE.format(title="pyplots.ai", description=DEFAULT_DESCRIPTION, image=image_url, url=page_url) ) @@ -105,7 +114,7 @@ async def seo_catalog(): BOT_HTML_TEMPLATE.format( title="Catalog | pyplots.ai", description="Browse all Python plotting specifications alphabetically. Find matplotlib, seaborn, plotly, bokeh, altair examples.", - image=DEFAULT_IMAGE, + image=DEFAULT_CATALOG_IMAGE, url="https://pyplots.ai/catalog", ) ) @@ -120,7 +129,7 @@ async def seo_spec_overview(spec_id: str, db: AsyncSession | None = Depends(opti BOT_HTML_TEMPLATE.format( title=f"{spec_id} | pyplots.ai", description=DEFAULT_DESCRIPTION, - image=DEFAULT_IMAGE, + image=DEFAULT_HOME_IMAGE, url=f"https://pyplots.ai/{html.escape(spec_id)}", ) ) @@ -132,7 +141,7 @@ async def seo_spec_overview(spec_id: str, db: AsyncSession | None = Depends(opti # Use collage og:image if implementations exist, otherwise default has_previews = any(i.preview_url for i in spec.impls) - image = f"https://api.pyplots.ai/og/{spec_id}.png" if has_previews else DEFAULT_IMAGE + image = f"https://api.pyplots.ai/og/{spec_id}.png" if has_previews else DEFAULT_HOME_IMAGE return HTMLResponse( BOT_HTML_TEMPLATE.format( @@ -153,7 +162,7 @@ async def seo_spec_implementation(spec_id: str, library: str, db: AsyncSession | BOT_HTML_TEMPLATE.format( title=f"{html.escape(spec_id)} - {html.escape(library)} | pyplots.ai", description=DEFAULT_DESCRIPTION, - image=DEFAULT_IMAGE, + image=DEFAULT_HOME_IMAGE, url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}", ) ) @@ -166,7 +175,7 @@ async def seo_spec_implementation(spec_id: str, library: str, db: AsyncSession | # Find the implementation for this library impl = next((i for i in spec.impls if i.library_id == library), None) # Use branded og:image endpoint if implementation has preview - image = f"https://api.pyplots.ai/og/{spec_id}/{library}.png" if impl and impl.preview_url else DEFAULT_IMAGE + image = f"https://api.pyplots.ai/og/{spec_id}/{library}.png" if impl and impl.preview_url else DEFAULT_HOME_IMAGE return HTMLResponse( BOT_HTML_TEMPLATE.format( diff --git a/docs/architecture/plausible.md b/docs/architecture/plausible.md index ed61c46172..63f06e25cc 100644 --- a/docs/architecture/plausible.md +++ b/docs/architecture/plausible.md @@ -116,6 +116,77 @@ https://pyplots.ai/{category}/{value}/{category}/{value}/... --- +## Server-Side og:image Tracking + +Social media bots (Twitter, WhatsApp, Teams, etc.) don't execute JavaScript, so og:image requests can only be tracked server-side. + +### Architecture + +All og:images are routed through the API for tracking: + +``` +Bot requests page → nginx detects bot → SEO proxy serves HTML with og:image URL + ↓ + https://api.pyplots.ai/og/{endpoint}.png + ↓ + track_og_image() → Plausible Events API + ↓ + Return image (fire-and-forget tracking) +``` + +**Implementation**: `api/analytics.py` (server-side Plausible tracking) + +### og:image Event + +| Event Name | Properties | Description | +|------------|------------|-------------| +| `og_image_view` | `page`, `platform`, `spec`?, `library`?, `filter_*`? | Bot requested og:image | + +### Properties + +| Property | Values | Description | +|----------|--------|-------------| +| `page` | `home`, `catalog`, `spec_overview`, `spec_detail` | Page type | +| `platform` | See list below | Detected platform from User-Agent | +| `spec` | Specification ID | Only for spec pages | +| `library` | Library ID | Only for detail pages | +| `filter_*` | Filter value | Dynamic props for filtered URLs (e.g., `filter_lib`, `filter_dom`) | + +### Platform Detection (27 platforms) + +**Social Media**: twitter, facebook, linkedin, pinterest, reddit, tumblr, mastodon + +**Messaging Apps**: slack, discord, telegram, whatsapp, signal, viber, skype, teams, snapchat + +**Search Engines**: google, bing, yandex, duckduckgo, baidu, apple + +**Link Preview Services**: embedly, quora, outbrain, rogerbot, showyoubot + +**Fallback**: unknown + +### API Endpoints + +| Endpoint | Description | Tracking | +|----------|-------------|----------| +| `/og/home.png` | Static og:image for home page | `page=home`, `filter_*` from query params | +| `/og/catalog.png` | Static og:image for catalog | `page=catalog` | +| `/og/{spec_id}.png` | Collage og:image for spec overview | `page=spec_overview`, `spec` | +| `/og/{spec_id}/{library}.png` | Branded og:image for implementation | `page=spec_detail`, `spec`, `library` | + +### Filter Tracking for Shared URLs + +When users share filtered URLs (e.g., `https://pyplots.ai/?lib=plotly&dom=statistics`), the filters are passed to the og:image endpoint: + +``` +og:image URL: https://api.pyplots.ai/og/home.png?lib=plotly,matplotlib&dom=statistics + ↓ +Tracked props: { page: "home", platform: "twitter", filter_lib: "plotly,matplotlib", filter_dom: "statistics" } +``` + +**Note**: Each filter category becomes a separate prop (`filter_lib`, `filter_dom`, etc.) to handle comma-separated values. + +--- + ## Plausible Dashboard Configuration ### Required Custom Properties @@ -128,16 +199,22 @@ To see event properties in Plausible dashboard, you **MUST** register them as cu | Property | Description | Used By Events | |----------|-------------|----------------| -| `spec` | Plot specification ID | `copy_code`, `download_image`, `navigate_to_spec`, `switch_library`, `select_implementation`, `back_to_overview`, `catalog_rotate`, `external_link`, `open_interactive` | -| `library` | Library name (matplotlib, seaborn, etc.) | `copy_code`, `download_image`, `navigate_to_spec`, `switch_library`, `select_implementation`, `back_to_overview`, `external_link`, `open_interactive`, `tab_click`, `tab_collapse` | +| `spec` | Plot specification ID | `copy_code`, `download_image`, `navigate_to_spec`, `switch_library`, `select_implementation`, `back_to_overview`, `catalog_rotate`, `external_link`, `open_interactive`, `og_image_view` | +| `library` | Library name (matplotlib, seaborn, etc.) | `copy_code`, `download_image`, `navigate_to_spec`, `switch_library`, `select_implementation`, `back_to_overview`, `external_link`, `open_interactive`, `tab_click`, `tab_collapse`, `og_image_view` | | `method` | Action method (card, image, tab, click, space, doubletap) | `copy_code`, `random` | -| `page` | Page context (home, spec_overview, spec_detail) | `copy_code`, `download_image` | +| `page` | Page context (home, catalog, spec_overview, spec_detail) | `copy_code`, `download_image`, `og_image_view` | +| `platform` | Bot/platform name (twitter, whatsapp, teams, etc.) | `og_image_view` | | `category` | Filter category (lib, plot, dom, feat, data, spec) | `search`, `random`, `filter_remove` | | `value` | Filter value | `random`, `filter_remove` | | `query` | Search query text | `search`, `search_no_results` | | `destination` | External link target (linkedin, github, stats) | `external_link` | | `tab` | Tab name (code, specification, implementation, quality) | `tab_click` | | `size` | Grid size (normal, compact) | `toggle_grid_size` | +| `filter_lib` | Library filter value (for og:image) | `og_image_view` | +| `filter_dom` | Domain filter value (for og:image) | `og_image_view` | +| `filter_plot` | Plot type filter value (for og:image) | `og_image_view` | +| `filter_feat` | Features filter value (for og:image) | `og_image_view` | +| `filter_data` | Data type filter value (for og:image) | `og_image_view` | ### Goals Configuration @@ -158,6 +235,7 @@ To see event properties in Plausible dashboard, you **MUST** register them as cu | `external_link` | Custom Event | Track outbound clicks | | `open_interactive` | Custom Event | Track interactive mode usage | | `tab_click` | Custom Event | Track tab interactions | +| `og_image_view` | Custom Event | Track og:image requests from social media bots | ### Funnels (Optional) @@ -239,6 +317,7 @@ User lands on pyplots.ai | `open_interactive` | `spec`, `library` | SpecPage.tsx | | `view_spec_overview` | `spec` | SpecPage.tsx | | `view_spec` | `spec`, `library` | SpecPage.tsx | +| `og_image_view` | `page`, `platform`, `spec`?, `library`?, `filter_*`? | api/analytics.py (server-side) | --- @@ -264,11 +343,30 @@ doubletap # Mobile double-tap ### `page` Values ``` -home # HomePage grid view +home # HomePage grid view (client) or og:image home endpoint (server) +catalog # CatalogPage (server og:image only) spec_overview # SpecPage showing all libraries spec_detail # SpecPage showing single library ``` +### `platform` Values (server-side og:image tracking only) +``` +# Social Media +twitter | facebook | linkedin | pinterest | reddit | tumblr | mastodon + +# Messaging Apps +slack | discord | telegram | whatsapp | signal | viber | skype | teams | snapchat + +# Search Engines +google | bing | yandex | duckduckgo | baidu | apple + +# Link Preview Services +embedly | quora | outbrain | rogerbot | showyoubot + +# Fallback +unknown +``` + ### `category` Values ``` lib # library filter @@ -341,15 +439,16 @@ window.plausible = function(...args) { console.log('Plausible:', args); }; - [x] Grid size toggle tracking (`toggle_grid_size`) - [x] Tab interaction events (`tab_click`, `tab_collapse`) - [x] External link events (`external_link`, `open_interactive`) +- [x] Server-side og:image tracking (`og_image_view`) with platform detection ### Plausible Dashboard Checklist -- [ ] Register all 10 custom properties (see table above) -- [ ] Create goals for key events +- [ ] Register all custom properties (see table above, including `platform` and `filter_*`) +- [ ] Create goals for key events (including `og_image_view`) - [ ] Set up funnels (optional) - [ ] Create custom dashboard widgets (optional) --- -**Last Updated**: 2025-01-05 -**Status**: Production-ready with full journey tracking +**Last Updated**: 2026-01-06 +**Status**: Production-ready with full journey tracking and server-side og:image analytics diff --git a/tests/unit/api/test_analytics.py b/tests/unit/api/test_analytics.py new file mode 100644 index 0000000000..43900cc4e5 --- /dev/null +++ b/tests/unit/api/test_analytics.py @@ -0,0 +1,193 @@ +"""Tests for server-side Plausible analytics tracking.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from api.analytics import PLATFORM_PATTERNS, detect_platform, track_og_image + + +class TestDetectPlatform: + """Tests for platform detection from User-Agent.""" + + def test_detects_twitter(self) -> None: + """Should detect Twitter bot.""" + assert detect_platform("Twitterbot/1.0") == "twitter" + + def test_detects_whatsapp(self) -> None: + """Should detect WhatsApp.""" + assert detect_platform("WhatsApp/2.21.4.22") == "whatsapp" + + def test_detects_facebook(self) -> None: + """Should detect Facebook.""" + assert detect_platform("facebookexternalhit/1.1") == "facebook" + + def test_detects_linkedin(self) -> None: + """Should detect LinkedIn.""" + assert detect_platform("LinkedInBot/1.0") == "linkedin" + + def test_detects_slack(self) -> None: + """Should detect Slack.""" + assert detect_platform("Slackbot-LinkExpanding 1.0") == "slack" + + def test_detects_discord(self) -> None: + """Should detect Discord.""" + assert detect_platform("Mozilla/5.0 (compatible; Discordbot/2.0)") == "discord" + + def test_detects_telegram(self) -> None: + """Should detect Telegram.""" + assert detect_platform("TelegramBot/1.0") == "telegram" + + def test_detects_teams(self) -> None: + """Should detect Microsoft Teams.""" + assert detect_platform("Mozilla/5.0 Microsoft Teams") == "teams" + + def test_detects_google(self) -> None: + """Should detect Googlebot.""" + assert detect_platform("Mozilla/5.0 (compatible; Googlebot/2.1)") == "google" + + def test_returns_unknown_for_regular_browser(self) -> None: + """Should return unknown for regular browsers.""" + assert detect_platform("Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0") == "unknown" + + def test_returns_unknown_for_empty_string(self) -> None: + """Should return unknown for empty User-Agent.""" + assert detect_platform("") == "unknown" + + def test_case_insensitive(self) -> None: + """Should match case-insensitively.""" + assert detect_platform("TWITTERBOT/1.0") == "twitter" + assert detect_platform("twitterbot/1.0") == "twitter" + + def test_all_platforms_have_patterns(self) -> None: + """Should have 27 platform patterns defined.""" + assert len(PLATFORM_PATTERNS) == 27 + + +class TestTrackOgImage: + """Tests for og:image tracking function.""" + + @pytest.fixture + def mock_request(self) -> MagicMock: + """Create a mock FastAPI request.""" + request = MagicMock() + request.headers = {"user-agent": "Twitterbot/1.0", "x-forwarded-for": "1.2.3.4"} + request.client = MagicMock() + request.client.host = "127.0.0.1" + return request + + def test_creates_async_task(self, mock_request: MagicMock) -> None: + """Should create background task without blocking.""" + with patch("api.analytics.asyncio.create_task") as mock_create_task: + track_og_image(mock_request, page="home") + mock_create_task.assert_called_once() + + def test_home_page_url(self, mock_request: MagicMock) -> None: + """Should build correct URL for home page.""" + with patch("api.analytics.asyncio.create_task") as mock_create_task: + track_og_image(mock_request, page="home") + call_args = mock_create_task.call_args[0][0] + # The coroutine should be called with home URL + assert call_args is not None + + def test_catalog_page_url(self, mock_request: MagicMock) -> None: + """Should build correct URL for catalog page.""" + with patch("api.analytics._send_plausible_event", new_callable=AsyncMock): + with patch("api.analytics.asyncio.create_task") as mock_create_task: + track_og_image(mock_request, page="catalog") + mock_create_task.assert_called_once() + + def test_spec_overview_url(self, mock_request: MagicMock) -> None: + """Should build correct URL for spec overview.""" + with patch("api.analytics.asyncio.create_task"): + # Should not raise even with spec_overview page + track_og_image(mock_request, page="spec_overview", spec="scatter-basic") + + def test_spec_detail_url(self, mock_request: MagicMock) -> None: + """Should build correct URL for spec detail.""" + with patch("api.analytics.asyncio.create_task"): + track_og_image(mock_request, page="spec_detail", spec="scatter-basic", library="matplotlib") + + def test_fallback_url_when_spec_none(self, mock_request: MagicMock) -> None: + """Should fallback to home URL when spec is None for spec-based page.""" + with patch("api.analytics.asyncio.create_task"): + # Should not raise - falls back to home URL + track_og_image(mock_request, page="spec_overview", spec=None) + + def test_includes_filter_props(self, mock_request: MagicMock) -> None: + """Should include filter parameters in props.""" + with patch("api.analytics.asyncio.create_task"): + track_og_image(mock_request, page="home", filters={"lib": "plotly", "dom": "statistics"}) + + def test_uses_x_forwarded_for(self) -> None: + """Should use X-Forwarded-For header for client IP.""" + request = MagicMock() + request.headers = {"user-agent": "Twitterbot/1.0", "x-forwarded-for": "5.6.7.8"} + request.client = None + + with patch("api.analytics.asyncio.create_task"): + track_og_image(request, page="home") + + def test_fallback_to_client_host(self) -> None: + """Should fallback to client.host when X-Forwarded-For not present.""" + request = MagicMock() + request.headers = {"user-agent": "Twitterbot/1.0"} + request.client = MagicMock() + request.client.host = "10.0.0.1" + + with patch("api.analytics.asyncio.create_task"): + track_og_image(request, page="home") + + def test_handles_missing_client(self) -> None: + """Should handle missing client gracefully.""" + request = MagicMock() + request.headers = {"user-agent": "Twitterbot/1.0"} + request.client = None + + with patch("api.analytics.asyncio.create_task"): + track_og_image(request, page="home") + + +class TestSendPlausibleEvent: + """Tests for Plausible API call.""" + + @pytest.mark.asyncio + async def test_sends_correct_payload(self) -> None: + """Should send correct payload to Plausible.""" + from api.analytics import _send_plausible_event + + with patch("api.analytics.httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client_class.return_value.__aenter__.return_value = mock_client + + await _send_plausible_event( + user_agent="Twitterbot/1.0", + client_ip="1.2.3.4", + name="og_image_view", + url="https://pyplots.ai/", + props={"page": "home", "platform": "twitter"}, + ) + + mock_client.post.assert_called_once() + call_kwargs = mock_client.post.call_args[1] + assert call_kwargs["json"]["name"] == "og_image_view" + assert call_kwargs["json"]["domain"] == "pyplots.ai" + + @pytest.mark.asyncio + async def test_handles_network_error(self) -> None: + """Should handle network errors gracefully.""" + from api.analytics import _send_plausible_event + + with patch("api.analytics.httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.post.side_effect = Exception("Network error") + mock_client_class.return_value.__aenter__.return_value = mock_client + + # Should not raise + await _send_plausible_event( + user_agent="Twitterbot/1.0", + client_ip="1.2.3.4", + name="og_image_view", + url="https://pyplots.ai/", + props={}, + ) diff --git a/tests/unit/api/test_routers.py b/tests/unit/api/test_routers.py index d491aba81d..395d7aa2c4 100644 --- a/tests/unit/api/test_routers.py +++ b/tests/unit/api/test_routers.py @@ -470,7 +470,7 @@ def test_seo_spec_overview_without_db(self, client: TestClient) -> None: assert response.status_code == 200 assert "og:title" in response.text assert "scatter-basic" in response.text - assert "og-image.png" in response.text # Default image + assert "api.pyplots.ai/og/home.png" in response.text # Default image via API def test_seo_spec_overview_with_db(self, db_client, mock_spec) -> None: """SEO spec overview should return HTML with spec title from DB.""" @@ -505,7 +505,7 @@ def test_seo_spec_implementation_without_db(self, client: TestClient) -> None: assert "og:title" in response.text assert "scatter-basic" in response.text assert "matplotlib" in response.text - assert "og-image.png" in response.text # Default image + assert "api.pyplots.ai/og/home.png" in response.text # Default image via API def test_seo_spec_implementation_with_preview_url(self, db_client, mock_spec) -> None: """SEO spec implementation should use preview_url from implementation.""" @@ -554,12 +554,58 @@ def test_seo_spec_implementation_fallback_image(self, db_client, mock_spec) -> N with patch("api.routers.seo.SpecRepository", return_value=mock_spec_repo): response = client.get("/seo-proxy/scatter-basic/seaborn") assert response.status_code == 200 - assert "og-image.png" in response.text # Default image used + assert "api.pyplots.ai/og/home.png" in response.text # Default image via API class TestOgImagesRouter: """Tests for OG image generation endpoints.""" + def test_get_home_og_image(self, client: TestClient) -> None: + """Should return static og:image for home page.""" + with patch("api.routers.og_images.track_og_image"): + with patch("api.routers.og_images._get_static_og_image", return_value=b"fake-image"): + response = client.get("/og/home.png") + assert response.status_code == 200 + assert response.headers["content-type"] == "image/png" + assert "max-age=86400" in response.headers["cache-control"] + + def test_get_home_og_image_with_filters(self, client: TestClient) -> None: + """Should pass filter params to tracking.""" + with patch("api.routers.og_images.track_og_image") as mock_track: + with patch("api.routers.og_images._get_static_og_image", return_value=b"fake-image"): + response = client.get("/og/home.png?lib=plotly&dom=statistics") + assert response.status_code == 200 + mock_track.assert_called_once() + call_kwargs = mock_track.call_args[1] + assert call_kwargs["page"] == "home" + assert call_kwargs["filters"] == {"lib": "plotly", "dom": "statistics"} + + def test_get_catalog_og_image(self, client: TestClient) -> None: + """Should return static og:image for catalog page.""" + with patch("api.routers.og_images.track_og_image") as mock_track: + with patch("api.routers.og_images._get_static_og_image", return_value=b"fake-image"): + response = client.get("/og/catalog.png") + assert response.status_code == 200 + assert response.headers["content-type"] == "image/png" + mock_track.assert_called_once() + call_kwargs = mock_track.call_args[1] + assert call_kwargs["page"] == "catalog" + + def test_get_static_og_image_file_not_found(self, client: TestClient) -> None: + """Should return 500 when static image file not found.""" + import api.routers.og_images as og_module + + # Reset cached image + og_module._STATIC_OG_IMAGE = None + + with patch("api.routers.og_images.track_og_image"): + with patch("pathlib.Path.read_bytes", side_effect=FileNotFoundError("not found")): + response = client.get("/og/home.png") + assert response.status_code == 500 + + # Reset for other tests + og_module._STATIC_OG_IMAGE = None + def test_get_branded_impl_image_no_db(self, client: TestClient) -> None: """Should return 503 when DB not available.""" with patch(DB_CONFIG_PATCH, return_value=False):