Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions api/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Server-side Plausible Analytics for og:image tracking.

Tracks og:image requests from social media bots (Twitter, WhatsApp, etc.)
since bots don't execute JavaScript and can't be tracked client-side.

Uses fire-and-forget pattern to avoid delaying responses.
"""

import asyncio
import logging

import httpx
from fastapi import Request


logger = logging.getLogger(__name__)

PLAUSIBLE_ENDPOINT = "https://plausible.io/api/event"
DOMAIN = "pyplots.ai"

# All platforms from nginx.conf bot detection (27 total)
PLATFORM_PATTERNS = {
# Social Media
"twitter": "twitterbot",
"facebook": "facebookexternalhit",
"linkedin": "linkedinbot",
"pinterest": "pinterestbot",
"reddit": "redditbot",
"tumblr": "tumblr",
"mastodon": "mastodon",
# Messaging Apps
"slack": "slackbot",
"discord": "discordbot",
"telegram": "telegrambot",
"whatsapp": "whatsapp",
"signal": "signal",
"viber": "viber",
"skype": "skypeuripreview",
"teams": "microsoft teams",
"snapchat": "snapchat",
# Search Engines
"google": "googlebot",
"bing": "bingbot",
"yandex": "yandexbot",
"duckduckgo": "duckduckbot",
"baidu": "baiduspider",
"apple": "applebot",
# Link Preview Services
"embedly": "embedly",
"quora": "quora link preview",
"outbrain": "outbrain",
"rogerbot": "rogerbot",
"showyoubot": "showyoubot",
}


def detect_platform(user_agent: str) -> str:
"""Detect platform from User-Agent string.

Args:
user_agent: The User-Agent header value

Returns:
Platform name (e.g., 'twitter', 'whatsapp') or 'unknown'
"""
ua_lower = user_agent.lower()
for platform, pattern in PLATFORM_PATTERNS.items():
if pattern in ua_lower:
return platform
return "unknown"


async def _send_plausible_event(user_agent: str, client_ip: str, name: str, url: str, props: dict) -> None:
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing type hints for the props parameter. The parameter is typed as 'dict' but should be 'dict[str, str]' to match how it's used in the calling function and for better type safety.

Suggested change
async def _send_plausible_event(user_agent: str, client_ip: str, name: str, url: str, props: dict) -> None:
async def _send_plausible_event(
user_agent: str,
client_ip: str,
name: str,
url: str,
props: dict[str, str],
) -> None:

Copilot uses AI. Check for mistakes.
"""Internal: Send event to Plausible (called as background task).

Args:
user_agent: Original User-Agent header
client_ip: Client IP for geolocation
name: Event name
url: Page URL
props: Event properties
"""
try:
async with httpx.AsyncClient(timeout=5.0) as client:
await client.post(
PLAUSIBLE_ENDPOINT,
headers={"User-Agent": user_agent, "X-Forwarded-For": client_ip, "Content-Type": "application/json"},
json={"name": name, "url": url, "domain": DOMAIN, "props": props},
)
except Exception as e:
logger.debug(f"Plausible tracking failed (non-critical): {e}")


def track_og_image(
request: Request,
page: str,
spec: str | None = None,
library: str | None = None,
filters: dict[str, str] | None = None,
) -> None:
"""Track og:image request (fire-and-forget).

Sends event to Plausible in background without blocking response.

Args:
request: FastAPI request for headers
page: Page type ('home', 'catalog', 'spec_overview', 'spec_detail')
spec: Spec ID (optional)
library: Library ID (optional)
filters: Query params for filtered home page (e.g., {'lib': 'plotly', 'dom': 'statistics'})
"""
user_agent = request.headers.get("user-agent", "")
client_ip = request.headers.get("x-forwarded-for", request.client.host if request.client else "")
platform = detect_platform(user_agent)

# Build URL based on page type
if page == "home":
url = "https://pyplots.ai/"
elif page == "catalog":
url = "https://pyplots.ai/catalog"
elif spec is not None and library:
url = f"https://pyplots.ai/{spec}/{library}"
elif spec is not None:
url = f"https://pyplots.ai/{spec}"
else:
# Fallback: missing spec for a spec-based page
url = "https://pyplots.ai/"

props: dict[str, str] = {"page": page, "platform": platform}
if spec:
props["spec"] = spec
if library:
props["library"] = library
if filters:
# Add each filter as separate prop (e.g., filter_lib, filter_dom)
# This handles comma-separated values like lib=plotly,matplotlib
for key, value in filters.items():
props[f"filter_{key}"] = value

# Fire-and-forget: create task without awaiting
asyncio.create_task(_send_plausible_event(user_agent, client_ip, "og_image_view", url, props))
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fire-and-forget task created without retaining a reference, which can lead to warnings and potential task cleanup issues. The created task can be garbage collected while still running, and exceptions in the task won't be logged. Consider using asyncio.create_task with a strong reference stored in a set, or use FastAPI's BackgroundTasks for better task lifecycle management.

Copilot uses AI. Check for mistakes.
57 changes: 54 additions & 3 deletions api/routers/og_images.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,64 @@
"""OG Image endpoints for branded social media preview images."""

import asyncio
from pathlib import Path

import httpx
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import Response
from sqlalchemy.ext.asyncio import AsyncSession

from api.analytics import track_og_image
from api.cache import cache_key, get_cache, set_cache
from api.dependencies import optional_db
from core.database import SpecRepository
from core.images import create_branded_og_image, create_og_collage


# Static og:image (loaded once at startup)
_STATIC_OG_IMAGE: bytes | None = None


def _get_static_og_image() -> bytes:
"""Load static og-image.png (cached in memory)."""
global _STATIC_OG_IMAGE
if _STATIC_OG_IMAGE is None:
path = Path(__file__).parent.parent.parent / "app" / "public" / "og-image.png"
try:
_STATIC_OG_IMAGE = path.read_bytes()
except FileNotFoundError as exc:
raise HTTPException(status_code=500, detail="Static OG image not found") from exc
return _STATIC_OG_IMAGE


router = APIRouter(prefix="/og", tags=["og-images"])


@router.get("/home.png")
async def get_home_og_image(request: Request) -> Response:
"""OG image for home page with tracking.

Supports filter params (e.g., ?lib=plotly&dom=statistics) for tracking shared filtered URLs.
"""
# Capture filter params for tracking (e.g., ?lib=plotly&dom=statistics)
filters = dict(request.query_params) if request.query_params else None
track_og_image(request, page="home", filters=filters)

return Response(
content=_get_static_og_image(), media_type="image/png", headers={"Cache-Control": "public, max-age=86400"}
)


@router.get("/catalog.png")
async def get_catalog_og_image(request: Request) -> Response:
"""OG image for catalog page with tracking."""
track_og_image(request, page="catalog")

return Response(
content=_get_static_og_image(), media_type="image/png", headers={"Cache-Control": "public, max-age=86400"}
)
Comment on lines +37 to +59
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing test coverage for new endpoints. The get_home_og_image and get_catalog_og_image endpoints lack unit tests to verify tracking integration, filter parameter handling, and proper response headers. Given the comprehensive test coverage in this repository, tests should be added to the TestOgImagesRouter class.

Copilot uses AI. Check for mistakes.


async def _fetch_image(url: str) -> bytes:
"""Fetch an image from a URL."""
async with httpx.AsyncClient(timeout=30.0) as client:
Expand All @@ -26,12 +69,15 @@ async def _fetch_image(url: str) -> bytes:

@router.get("/{spec_id}/{library}.png")
async def get_branded_impl_image(
spec_id: str, library: str, db: AsyncSession | None = Depends(optional_db)
spec_id: str, library: str, request: Request, db: AsyncSession | None = Depends(optional_db)
) -> Response:
"""Get a branded OG image for an implementation.

Returns a 1200x630 PNG with pyplots.ai header and the plot image.
"""
# Track og:image request (fire-and-forget)
track_og_image(request, page="spec_detail", spec=spec_id, library=library)

# Check cache first
key = cache_key("og", spec_id, library)
cached = get_cache(key)
Expand Down Expand Up @@ -70,12 +116,17 @@ async def get_branded_impl_image(


@router.get("/{spec_id}.png")
async def get_spec_collage_image(spec_id: str, db: AsyncSession | None = Depends(optional_db)) -> Response:
async def get_spec_collage_image(
spec_id: str, request: Request, db: AsyncSession | None = Depends(optional_db)
) -> Response:
"""Get a collage OG image for a spec (showing top 6 implementations by quality).

Returns a 1200x630 PNG with pyplots.ai branding and a 2x3 grid of implementations,
sorted by quality_score descending.
"""
# Track og:image request (fire-and-forget)
track_og_image(request, page="spec_overview", spec=spec_id)

# Check cache first
key = cache_key("og", spec_id, "collage")
cached = get_cache(key)
Expand Down
33 changes: 21 additions & 12 deletions api/routers/seo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import html

from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse, Response
from sqlalchemy.ext.asyncio import AsyncSession

Expand Down Expand Up @@ -36,7 +36,9 @@
<body><h1>{title}</h1><p>{description}</p></body>
</html>"""

DEFAULT_IMAGE = "https://pyplots.ai/og-image.png"
# Route through API for tracking (was: pyplots.ai/og-image.png)
DEFAULT_HOME_IMAGE = "https://api.pyplots.ai/og/home.png"
DEFAULT_CATALOG_IMAGE = "https://api.pyplots.ai/og/catalog.png"
DEFAULT_DESCRIPTION = "library-agnostic, ai-powered python plotting."


Expand Down Expand Up @@ -89,12 +91,19 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):


@router.get("/seo-proxy/")
async def seo_home():
"""Bot-optimized home page with correct og:tags."""
async def seo_home(request: Request):
"""Bot-optimized home page with correct og:tags.

Passes query params (e.g., ?lib=plotly&dom=statistics) to og:image URL for tracking.
"""
# Pass filter params to og:image URL for tracking shared filtered URLs
# Use html.escape to prevent XSS via query params
query_string = html.escape(str(request.query_params), quote=True) if request.query_params else ""
image_url = f"{DEFAULT_HOME_IMAGE}?{query_string}" if query_string else DEFAULT_HOME_IMAGE
page_url = f"https://pyplots.ai/?{query_string}" if query_string else "https://pyplots.ai/"

return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title="pyplots.ai", description=DEFAULT_DESCRIPTION, image=DEFAULT_IMAGE, url="https://pyplots.ai/"
)
BOT_HTML_TEMPLATE.format(title="pyplots.ai", description=DEFAULT_DESCRIPTION, image=image_url, url=page_url)
)


Expand All @@ -105,7 +114,7 @@ async def seo_catalog():
BOT_HTML_TEMPLATE.format(
title="Catalog | pyplots.ai",
description="Browse all Python plotting specifications alphabetically. Find matplotlib, seaborn, plotly, bokeh, altair examples.",
image=DEFAULT_IMAGE,
image=DEFAULT_CATALOG_IMAGE,
url="https://pyplots.ai/catalog",
)
)
Expand All @@ -120,7 +129,7 @@ async def seo_spec_overview(spec_id: str, db: AsyncSession | None = Depends(opti
BOT_HTML_TEMPLATE.format(
title=f"{spec_id} | pyplots.ai",
description=DEFAULT_DESCRIPTION,
image=DEFAULT_IMAGE,
image=DEFAULT_HOME_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}",
)
)
Expand All @@ -132,7 +141,7 @@ async def seo_spec_overview(spec_id: str, db: AsyncSession | None = Depends(opti

# Use collage og:image if implementations exist, otherwise default
has_previews = any(i.preview_url for i in spec.impls)
image = f"https://api.pyplots.ai/og/{spec_id}.png" if has_previews else DEFAULT_IMAGE
image = f"https://api.pyplots.ai/og/{spec_id}.png" if has_previews else DEFAULT_HOME_IMAGE

return HTMLResponse(
BOT_HTML_TEMPLATE.format(
Expand All @@ -153,7 +162,7 @@ async def seo_spec_implementation(spec_id: str, library: str, db: AsyncSession |
BOT_HTML_TEMPLATE.format(
title=f"{html.escape(spec_id)} - {html.escape(library)} | pyplots.ai",
description=DEFAULT_DESCRIPTION,
image=DEFAULT_IMAGE,
image=DEFAULT_HOME_IMAGE,
url=f"https://pyplots.ai/{html.escape(spec_id)}/{html.escape(library)}",
)
)
Expand All @@ -166,7 +175,7 @@ async def seo_spec_implementation(spec_id: str, library: str, db: AsyncSession |
# Find the implementation for this library
impl = next((i for i in spec.impls if i.library_id == library), None)
# Use branded og:image endpoint if implementation has preview
image = f"https://api.pyplots.ai/og/{spec_id}/{library}.png" if impl and impl.preview_url else DEFAULT_IMAGE
image = f"https://api.pyplots.ai/og/{spec_id}/{library}.png" if impl and impl.preview_url else DEFAULT_HOME_IMAGE

return HTMLResponse(
BOT_HTML_TEMPLATE.format(
Expand Down
Loading