diff --git a/api/analytics.py b/api/analytics.py new file mode 100644 index 0000000000..f5babd98d5 --- /dev/null +++ b/api/analytics.py @@ -0,0 +1,141 @@ +"""Server-side Plausible Analytics for og:image tracking. + +Tracks og:image requests from social media bots (Twitter, WhatsApp, etc.) +since bots don't execute JavaScript and can't be tracked client-side. + +Uses fire-and-forget pattern to avoid delaying responses. +""" + +import asyncio +import logging + +import httpx +from fastapi import Request + + +logger = logging.getLogger(__name__) + +PLAUSIBLE_ENDPOINT = "https://plausible.io/api/event" +DOMAIN = "pyplots.ai" + +# All platforms from nginx.conf bot detection (27 total) +PLATFORM_PATTERNS = { + # Social Media + "twitter": "twitterbot", + "facebook": "facebookexternalhit", + "linkedin": "linkedinbot", + "pinterest": "pinterestbot", + "reddit": "redditbot", + "tumblr": "tumblr", + "mastodon": "mastodon", + # Messaging Apps + "slack": "slackbot", + "discord": "discordbot", + "telegram": "telegrambot", + "whatsapp": "whatsapp", + "signal": "signal", + "viber": "viber", + "skype": "skypeuripreview", + "teams": "microsoft teams", + "snapchat": "snapchat", + # Search Engines + "google": "googlebot", + "bing": "bingbot", + "yandex": "yandexbot", + "duckduckgo": "duckduckbot", + "baidu": "baiduspider", + "apple": "applebot", + # Link Preview Services + "embedly": "embedly", + "quora": "quora link preview", + "outbrain": "outbrain", + "rogerbot": "rogerbot", + "showyoubot": "showyoubot", +} + + +def detect_platform(user_agent: str) -> str: + """Detect platform from User-Agent string. + + Args: + user_agent: The User-Agent header value + + Returns: + Platform name (e.g., 'twitter', 'whatsapp') or 'unknown' + """ + ua_lower = user_agent.lower() + for platform, pattern in PLATFORM_PATTERNS.items(): + if pattern in ua_lower: + return platform + return "unknown" + + +async def _send_plausible_event(user_agent: str, client_ip: str, name: str, url: str, props: dict) -> None: + """Internal: Send event to Plausible (called as background task). + + Args: + user_agent: Original User-Agent header + client_ip: Client IP for geolocation + name: Event name + url: Page URL + props: Event properties + """ + try: + async with httpx.AsyncClient(timeout=5.0) as client: + await client.post( + PLAUSIBLE_ENDPOINT, + headers={"User-Agent": user_agent, "X-Forwarded-For": client_ip, "Content-Type": "application/json"}, + json={"name": name, "url": url, "domain": DOMAIN, "props": props}, + ) + except Exception as e: + logger.debug(f"Plausible tracking failed (non-critical): {e}") + + +def track_og_image( + request: Request, + page: str, + spec: str | None = None, + library: str | None = None, + filters: dict[str, str] | None = None, +) -> None: + """Track og:image request (fire-and-forget). + + Sends event to Plausible in background without blocking response. + + Args: + request: FastAPI request for headers + page: Page type ('home', 'catalog', 'spec_overview', 'spec_detail') + spec: Spec ID (optional) + library: Library ID (optional) + filters: Query params for filtered home page (e.g., {'lib': 'plotly', 'dom': 'statistics'}) + """ + user_agent = request.headers.get("user-agent", "") + client_ip = request.headers.get("x-forwarded-for", request.client.host if request.client else "") + platform = detect_platform(user_agent) + + # Build URL based on page type + if page == "home": + url = "https://pyplots.ai/" + elif page == "catalog": + url = "https://pyplots.ai/catalog" + elif spec is not None and library: + url = f"https://pyplots.ai/{spec}/{library}" + elif spec is not None: + url = f"https://pyplots.ai/{spec}" + else: + # Fallback: missing spec for a spec-based page + url = "https://pyplots.ai/" + + props: dict[str, str] = {"page": page, "platform": platform} + if spec: + props["spec"] = spec + if library: + props["library"] = library + if filters: + # Add each filter as separate prop (e.g., filter_lib, filter_dom) + # This handles comma-separated values like lib=plotly,matplotlib + for key, value in filters.items(): + props[f"filter_{key}"] = value + + # Fire-and-forget: create task without awaiting + asyncio.create_task(_send_plausible_event(user_agent, client_ip, "og_image_view", url, props)) diff --git a/api/routers/og_images.py b/api/routers/og_images.py index 3c95087223..df67b3f640 100644 --- a/api/routers/og_images.py +++ b/api/routers/og_images.py @@ -1,21 +1,64 @@ """OG Image endpoints for branded social media preview images.""" import asyncio +from pathlib import Path import httpx -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import Response from sqlalchemy.ext.asyncio import AsyncSession +from api.analytics import track_og_image from api.cache import cache_key, get_cache, set_cache from api.dependencies import optional_db from core.database import SpecRepository from core.images import create_branded_og_image, create_og_collage +# Static og:image (loaded once at startup) +_STATIC_OG_IMAGE: bytes | None = None + + +def _get_static_og_image() -> bytes: + """Load static og-image.png (cached in memory).""" + global _STATIC_OG_IMAGE + if _STATIC_OG_IMAGE is None: + path = Path(__file__).parent.parent.parent / "app" / "public" / "og-image.png" + try: + _STATIC_OG_IMAGE = path.read_bytes() + except FileNotFoundError as exc: + raise HTTPException(status_code=500, detail="Static OG image not found") from exc + return _STATIC_OG_IMAGE + + router = APIRouter(prefix="/og", tags=["og-images"]) +@router.get("/home.png") +async def get_home_og_image(request: Request) -> Response: + """OG image for home page with tracking. + + Supports filter params (e.g., ?lib=plotly&dom=statistics) for tracking shared filtered URLs. + """ + # Capture filter params for tracking (e.g., ?lib=plotly&dom=statistics) + filters = dict(request.query_params) if request.query_params else None + track_og_image(request, page="home", filters=filters) + + return Response( + content=_get_static_og_image(), media_type="image/png", headers={"Cache-Control": "public, max-age=86400"} + ) + + +@router.get("/catalog.png") +async def get_catalog_og_image(request: Request) -> Response: + """OG image for catalog page with tracking.""" + track_og_image(request, page="catalog") + + return Response( + content=_get_static_og_image(), media_type="image/png", headers={"Cache-Control": "public, max-age=86400"} + ) + + async def _fetch_image(url: str) -> bytes: """Fetch an image from a URL.""" async with httpx.AsyncClient(timeout=30.0) as client: @@ -26,12 +69,15 @@ async def _fetch_image(url: str) -> bytes: @router.get("/{spec_id}/{library}.png") async def get_branded_impl_image( - spec_id: str, library: str, db: AsyncSession | None = Depends(optional_db) + spec_id: str, library: str, request: Request, db: AsyncSession | None = Depends(optional_db) ) -> Response: """Get a branded OG image for an implementation. Returns a 1200x630 PNG with pyplots.ai header and the plot image. """ + # Track og:image request (fire-and-forget) + track_og_image(request, page="spec_detail", spec=spec_id, library=library) + # Check cache first key = cache_key("og", spec_id, library) cached = get_cache(key) @@ -70,12 +116,17 @@ async def get_branded_impl_image( @router.get("/{spec_id}.png") -async def get_spec_collage_image(spec_id: str, db: AsyncSession | None = Depends(optional_db)) -> Response: +async def get_spec_collage_image( + spec_id: str, request: Request, db: AsyncSession | None = Depends(optional_db) +) -> Response: """Get a collage OG image for a spec (showing top 6 implementations by quality). Returns a 1200x630 PNG with pyplots.ai branding and a 2x3 grid of implementations, sorted by quality_score descending. """ + # Track og:image request (fire-and-forget) + track_og_image(request, page="spec_overview", spec=spec_id) + # Check cache first key = cache_key("og", spec_id, "collage") cached = get_cache(key) diff --git a/api/routers/seo.py b/api/routers/seo.py index d9b3df1eb4..de103eab39 100644 --- a/api/routers/seo.py +++ b/api/routers/seo.py @@ -2,7 +2,7 @@ import html -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import HTMLResponse, Response from sqlalchemy.ext.asyncio import AsyncSession @@ -36,7 +36,9 @@
{description}