Skip to content

Commit cde3980

Browse files
feat(sitemap): enhance sitemap generation and caching
- Implement background sitemap refresh with database session - Add caching for sitemap XML responses - Update cache clearing logic for sitemap-related keys - Adjust cache control headers for improved performance
1 parent 7bfad56 commit cde3980

File tree

4 files changed

+68
-40
lines changed

4 files changed

+68
-40
lines changed

api/cache.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ def clear_spec_cache(spec_id: str) -> int:
149149
count += clear_cache_by_pattern("specs_list") # List might have changed
150150
count += clear_cache_by_pattern("filter:") # Filters might be affected
151151
count += clear_cache_by_pattern("stats") # Stats might have changed
152+
count += clear_cache_by_pattern("sitemap") # Sitemap includes spec URLs
153+
count += clear_cache_by_pattern(f"seo:{spec_id}") # SEO proxy pages for this spec
154+
count += clear_cache_by_pattern(f"og:{spec_id}") # OG images for this spec
152155
return count
153156

154157

@@ -172,6 +175,7 @@ def clear_library_cache(library_id: str) -> int:
172175
count += clear_cache_by_pattern("libraries") # List might have changed
173176
count += clear_cache_by_pattern("filter:") # Filters might be affected
174177
count += clear_cache_by_pattern("stats") # Stats might have changed
178+
count += clear_cache_by_pattern("sitemap") # Sitemap includes library URLs
175179
return count
176180

177181

api/main.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,18 @@ async def add_cache_headers(request: Request, call_next):
116116

117117
path = request.url.path
118118

119-
# Static data that rarely changes (5 min cache, stale-while-revalidate for 1 hour)
119+
# Static data changes only on deploy (10 min cache, 1h stale-while-revalidate)
120120
if path in ("/libraries", "/stats"):
121-
response.headers["Cache-Control"] = "public, max-age=300, stale-while-revalidate=3600"
122-
# Specs list - moderate caching (2 min cache)
121+
response.headers["Cache-Control"] = "public, max-age=600, stale-while-revalidate=3600"
122+
# Specs list (5 min cache, 1h stale-while-revalidate)
123123
elif path == "/specs":
124-
response.headers["Cache-Control"] = "public, max-age=120, stale-while-revalidate=600"
125-
# Filter endpoint - short cache (30 sec) with stale-while-revalidate
124+
response.headers["Cache-Control"] = "public, max-age=300, stale-while-revalidate=3600"
125+
# Filter endpoint — most dynamic, moderate cache (1 min, 10 min stale)
126126
elif path == "/plots/filter":
127-
response.headers["Cache-Control"] = "public, max-age=30, stale-while-revalidate=300"
128-
# Individual spec details
127+
response.headers["Cache-Control"] = "public, max-age=60, stale-while-revalidate=600"
128+
# Individual spec details (5 min cache, 1h stale-while-revalidate)
129129
elif path.startswith("/specs/"):
130-
response.headers["Cache-Control"] = "public, max-age=120, stale-while-revalidate=600"
130+
response.headers["Cache-Control"] = "public, max-age=300, stale-while-revalidate=3600"
131131

132132
return response
133133

api/routers/seo.py

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
from fastapi.responses import HTMLResponse, Response
88
from sqlalchemy.ext.asyncio import AsyncSession
99

10-
from api.cache import cache_key, get_cache, set_cache
10+
from api.cache import cache_key, get_cache, get_or_set_cache, set_cache
1111
from api.dependencies import optional_db
12+
from core.config import settings
1213
from core.database import SpecRepository
14+
from core.database.connection import get_db_context
1315

1416

1517
router = APIRouter(tags=["seo"])
@@ -20,6 +22,42 @@ def _lastmod(dt: datetime | None) -> str:
2022
return f"<lastmod>{dt.strftime('%Y-%m-%d')}</lastmod>" if dt else ""
2123

2224

25+
def _build_sitemap_xml(specs: list) -> str:
26+
"""Build sitemap XML string from specs."""
27+
xml_lines = [
28+
'<?xml version="1.0" encoding="UTF-8"?>',
29+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
30+
" <url><loc>https://pyplots.ai/</loc></url>",
31+
" <url><loc>https://pyplots.ai/catalog</loc></url>",
32+
" <url><loc>https://pyplots.ai/mcp</loc></url>",
33+
" <url><loc>https://pyplots.ai/legal</loc></url>",
34+
]
35+
36+
for spec in specs:
37+
if spec.impls:
38+
spec_id = html.escape(spec.id)
39+
xml_lines.append(f" <url><loc>https://pyplots.ai/{spec_id}</loc>{_lastmod(spec.updated)}</url>")
40+
for impl in spec.impls:
41+
library_id = html.escape(impl.library_id)
42+
xml_lines.append(
43+
f" <url><loc>https://pyplots.ai/{spec_id}/{library_id}</loc>{_lastmod(impl.updated)}</url>"
44+
)
45+
46+
xml_lines.append("</urlset>")
47+
return "\n".join(xml_lines)
48+
49+
50+
_STATIC_SITEMAP = _build_sitemap_xml([])
51+
52+
53+
async def _refresh_sitemap() -> str:
54+
"""Standalone factory for background sitemap refresh (creates own DB session)."""
55+
async with get_db_context() as db:
56+
repo = SpecRepository(db)
57+
specs = await repo.get_all()
58+
return _build_sitemap_xml(specs)
59+
60+
2361
# Minimal HTML template for social media bots (meta tags are what matters)
2462
BOT_HTML_TEMPLATE = """<!DOCTYPE html>
2563
<html lang="en">
@@ -66,41 +104,17 @@ async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
66104
67105
Includes root, catalog page, and all specs with implementations.
68106
"""
69-
key = cache_key("sitemap_xml")
70-
cached = get_cache(key)
71-
if cached:
72-
return Response(content=cached, media_type="application/xml")
73-
74-
# Build XML lines
75-
xml_lines = [
76-
'<?xml version="1.0" encoding="UTF-8"?>',
77-
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
78-
" <url><loc>https://pyplots.ai/</loc></url>",
79-
" <url><loc>https://pyplots.ai/catalog</loc></url>",
80-
" <url><loc>https://pyplots.ai/mcp</loc></url>",
81-
" <url><loc>https://pyplots.ai/legal</loc></url>",
82-
]
107+
if db is None:
108+
return Response(content=_STATIC_SITEMAP, media_type="application/xml")
83109

84-
# Add spec URLs (overview + all implementations)
85-
if db is not None:
110+
async def _fetch() -> str:
86111
repo = SpecRepository(db)
87112
specs = await repo.get_all()
88-
for spec in specs:
89-
if spec.impls: # Only include specs with implementations
90-
spec_id = html.escape(spec.id)
91-
# Overview page
92-
xml_lines.append(f" <url><loc>https://pyplots.ai/{spec_id}</loc>{_lastmod(spec.updated)}</url>")
93-
# Individual implementation pages
94-
for impl in spec.impls:
95-
library_id = html.escape(impl.library_id)
96-
xml_lines.append(
97-
f" <url><loc>https://pyplots.ai/{spec_id}/{library_id}</loc>{_lastmod(impl.updated)}</url>"
98-
)
99-
100-
xml_lines.append("</urlset>")
101-
xml = "\n".join(xml_lines)
113+
return _build_sitemap_xml(specs)
102114

103-
set_cache(key, xml)
115+
xml = await get_or_set_cache(
116+
cache_key("sitemap_xml"), _fetch, refresh_after=settings.cache_refresh_after, refresh_factory=_refresh_sitemap
117+
)
104118
return Response(content=xml, media_type="application/xml")
105119

106120

tests/unit/api/test_cache.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ def test_clear_spec_entries(self) -> None:
155155
set_cache("specs_list", "all specs")
156156
set_cache("filter:lib=matplotlib", "filter result")
157157
set_cache("stats", "stats data")
158+
set_cache("sitemap_xml", "sitemap")
159+
set_cache("seo:scatter-basic", "seo overview")
160+
set_cache("seo:scatter-basic:matplotlib", "seo impl")
161+
set_cache("og:scatter-basic:matplotlib", "og image")
158162
set_cache("unrelated:key", "unrelated")
159163

160164
# Clear spec cache
@@ -167,6 +171,10 @@ def test_clear_spec_entries(self) -> None:
167171
assert get_cache("specs_list") is None
168172
assert get_cache("filter:lib=matplotlib") is None
169173
assert get_cache("stats") is None
174+
assert get_cache("sitemap_xml") is None
175+
assert get_cache("seo:scatter-basic") is None
176+
assert get_cache("seo:scatter-basic:matplotlib") is None
177+
assert get_cache("og:scatter-basic:matplotlib") is None
170178

171179
# Unrelated should still be there
172180
assert get_cache("unrelated:key") is not None
@@ -191,6 +199,7 @@ def test_clear_library_entries(self) -> None:
191199
set_cache("libraries", "all libraries")
192200
set_cache("filter:lib=matplotlib", "filter result")
193201
set_cache("stats", "stats data")
202+
set_cache("sitemap_xml", "sitemap")
194203
set_cache("unrelated:key", "unrelated")
195204

196205
# Clear library cache
@@ -202,6 +211,7 @@ def test_clear_library_entries(self) -> None:
202211
assert get_cache("libraries") is None
203212
assert get_cache("filter:lib=matplotlib") is None
204213
assert get_cache("stats") is None
214+
assert get_cache("sitemap_xml") is None
205215

206216
# Unrelated should still be there
207217
assert get_cache("unrelated:key") is not None

0 commit comments

Comments
 (0)