Skip to content

Commit 155f2a1

Browse files
MarkusNeusingerclaudeCopilot
authored
feat: block /debug route from search engine indexing (#3815)
Adds Disallow: /debug to robots.txt to prevent search engines from crawling and indexing the internal debug dashboard. This follows SEO best practices for internal admin/debug tools. --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 1cf11e7 commit 155f2a1

File tree

6 files changed

+67
-1
lines changed

6 files changed

+67
-1
lines changed

api/routers/seo.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@
4242
DEFAULT_DESCRIPTION = "library-agnostic, ai-powered python plotting."
4343

4444

45+
@router.get("/robots.txt")
46+
async def get_robots():
47+
"""
48+
Serve robots.txt for API backend.
49+
50+
Blocks all crawlers - APIs should not be indexed by search engines.
51+
Social media bots (WhatsApp, Twitter, etc.) are unaffected.
52+
"""
53+
return Response(content="User-agent: *\nDisallow: /\n", media_type="text/plain")
54+
55+
4556
@router.get("/sitemap.xml")
4657
async def get_sitemap(db: AsyncSession | None = Depends(optional_db)):
4758
"""

app/public/robots.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
User-agent: *
22
Allow: /
3+
Disallow: /debug
34

45
Sitemap: https://pyplots.ai/sitemap.xml

docs/reference/seo.md

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,34 @@ Uses **MonoLisa** variable font (commercial, not in repo):
210210
- Cached locally in `/tmp/pyplots-fonts/`
211211
- Fallback: DejaVuSansMono-Bold
212212

213+
## Robots.txt
214+
215+
### Frontend (pyplots.ai)
216+
217+
Static file at `app/public/robots.txt`:
218+
219+
```txt
220+
User-agent: *
221+
Allow: /
222+
Disallow: /debug
223+
224+
Sitemap: https://pyplots.ai/sitemap.xml
225+
```
226+
227+
### Backend (api.pyplots.ai)
228+
229+
Dynamic endpoint at `GET /robots.txt`:
230+
231+
```txt
232+
User-agent: *
233+
Disallow: /
234+
```
235+
236+
**Why block the API?**
237+
- APIs should not be indexed by search engines
238+
- Prevents crawling of debug endpoints, docs, and API responses
239+
- Social media bots (WhatsApp, Twitter, etc.) are unaffected - they fetch og:images directly
240+
213241
## Sitemap
214242

215243
Dynamic XML sitemap for search engine indexing.
@@ -277,7 +305,8 @@ curl -o test.png https://api.pyplots.ai/og/scatter-basic.png
277305
| File | Purpose |
278306
|------|---------|
279307
| `app/nginx.conf` | Bot detection, SPA routing, sitemap proxy |
280-
| `api/routers/seo.py` | SEO proxy endpoints, sitemap generation |
308+
| `app/public/robots.txt` | Frontend robots.txt (blocks /debug) |
309+
| `api/routers/seo.py` | SEO proxy endpoints, robots.txt, sitemap generation |
281310
| `api/routers/og_images.py` | Branded og:image endpoints |
282311
| `core/images.py` | Image processing, branding functions |
283312

tests/e2e/test_api_postgres.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ async def test_download_impl_not_found(self, client):
196196
class TestSeoEndpoints:
197197
"""E2E tests for SEO endpoints with real PostgreSQL."""
198198

199+
async def test_robots_txt(self, client):
200+
"""Should return robots.txt blocking crawlers from all routes."""
201+
response = await client.get("/robots.txt")
202+
assert response.status_code == 200
203+
assert response.headers["content-type"] == "text/plain; charset=utf-8"
204+
assert "User-agent: *" in response.text
205+
assert "Disallow: /" in response.text
206+
199207
async def test_sitemap(self, client):
200208
"""Should return sitemap XML with spec URLs."""
201209
response = await client.get("/sitemap.xml")

tests/integration/api/test_api_endpoints.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,14 @@ async def test_download_impl_not_found(self, client):
193193
class TestSeoEndpoints:
194194
"""Integration tests for SEO endpoints."""
195195

196+
async def test_robots_txt(self, client):
197+
"""Should return robots.txt blocking crawlers from all routes."""
198+
response = await client.get("/robots.txt")
199+
assert response.status_code == 200
200+
assert response.headers["content-type"] == "text/plain; charset=utf-8"
201+
assert "User-agent: *" in response.text
202+
assert "Disallow: /" in response.text
203+
196204
async def test_sitemap(self, client):
197205
"""Should return sitemap XML with spec URLs."""
198206
response = await client.get("/sitemap.xml")

tests/unit/api/test_routers.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,15 @@ def test_download_gcs_error(self, client: TestClient, mock_spec) -> None:
409409
class TestSeoRouter:
410410
"""Tests for SEO router."""
411411

412+
def test_robots_txt(self, client: TestClient) -> None:
413+
"""robots.txt should block crawlers from all routes."""
414+
response = client.get("/robots.txt")
415+
assert response.status_code == 200
416+
assert response.headers["content-type"] == "text/plain; charset=utf-8"
417+
content = response.text
418+
assert "User-agent: *" in content
419+
assert "Disallow: /" in content
420+
412421
def test_sitemap_structure(self, client: TestClient) -> None:
413422
"""Sitemap should return valid XML structure."""
414423
with patch(DB_CONFIG_PATCH, return_value=False):

0 commit comments

Comments
 (0)