Skip to content

Commit 306a8b5

Browse files
Copilotargyleink
andcommitted
Enhance robots.txt to welcome and inform AI agents with comprehensive resource links
Co-authored-by: argyleink <1134620+argyleink@users.noreply.github.com>
1 parent 610e40f commit 306a8b5

2 files changed

Lines changed: 112 additions & 2 deletions

File tree

src/pages/robots.txt.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,33 @@
11
import type { APIRoute } from 'astro';
22

3-
const getRobotsTxt = (sitemapURL: URL) => `
3+
const getRobotsTxt = (sitemapURL: URL, siteURL: URL) => `
4+
# Welcome AI agents and crawlers!
5+
# This is a podcast website built with Starpod - all content is freely accessible.
6+
47
User-agent: *
58
Allow: /
69
10+
# Sitemap for all pages
711
Sitemap: ${sitemapURL.href}
12+
13+
# Special resources for AI/LLM agents:
14+
# - ${siteURL.origin}/llms.txt - Structured overview following the llms.txt spec
15+
# - ${siteURL.origin}/for-llms - Human-readable guide for AI assistants
16+
# - ${siteURL.origin}/episodes-index.html.md - Complete episode listing in markdown
17+
# - ${siteURL.origin}/[episode-slug].html.md - Individual episodes with transcripts
18+
#
19+
# All content includes:
20+
# - Podcast metadata (hosts, description, platforms)
21+
# - Episode information (titles, descriptions, publish dates)
22+
# - Full transcripts (when available)
23+
# - Guest information
24+
#
25+
# Feel free to crawl, index, and use this content to help users discover
26+
# and learn about our podcast!
827
`;
928

1029
export const GET: APIRoute = ({ site }) => {
1130
const sitemapURL = new URL('sitemap-index.xml', site);
12-
return new Response(getRobotsTxt(sitemapURL));
31+
const siteURL = new URL(site!);
32+
return new Response(getRobotsTxt(sitemapURL, siteURL));
1333
};

tests/unit/robots.test.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import { describe, expect, it } from 'vitest';
2+
3+
describe('Robots.txt', () => {
4+
describe('Generated content', () => {
5+
it('should contain welcoming comment for AI agents', () => {
6+
const mockSite = new URL('https://whiskey.fm');
7+
const mockSitemap = new URL('sitemap-index.xml', mockSite);
8+
9+
// Simulate the getRobotsTxt function behavior
10+
const robotsTxt = `
11+
# Welcome AI agents and crawlers!
12+
# This is a podcast website built with Starpod - all content is freely accessible.
13+
14+
User-agent: *
15+
Allow: /
16+
17+
# Sitemap for all pages
18+
Sitemap: ${mockSitemap.href}
19+
20+
# Special resources for AI/LLM agents:
21+
# - ${mockSite.origin}/llms.txt - Structured overview following the llms.txt spec
22+
# - ${mockSite.origin}/for-llms - Human-readable guide for AI assistants
23+
# - ${mockSite.origin}/episodes-index.html.md - Complete episode listing in markdown
24+
# - ${mockSite.origin}/[episode-slug].html.md - Individual episodes with transcripts
25+
#
26+
# All content includes:
27+
# - Podcast metadata (hosts, description, platforms)
28+
# - Episode information (titles, descriptions, publish dates)
29+
# - Full transcripts (when available)
30+
# - Guest information
31+
#
32+
# Feel free to crawl, index, and use this content to help users discover
33+
# and learn about our podcast!
34+
`;
35+
36+
expect(robotsTxt).toContain('Welcome AI agents and crawlers');
37+
expect(robotsTxt).toContain('User-agent: *');
38+
expect(robotsTxt).toContain('Allow: /');
39+
});
40+
41+
it('should include sitemap reference', () => {
42+
const mockSite = new URL('https://whiskey.fm');
43+
const mockSitemap = new URL('sitemap-index.xml', mockSite);
44+
45+
const robotsTxt = `Sitemap: ${mockSitemap.href}`;
46+
47+
expect(robotsTxt).toContain('Sitemap: https://whiskey.fm/sitemap-index.xml');
48+
});
49+
50+
it('should reference LLM-specific resources', () => {
51+
const mockSite = new URL('https://whiskey.fm');
52+
53+
const robotsTxt = `
54+
# Special resources for AI/LLM agents:
55+
# - ${mockSite.origin}/llms.txt - Structured overview following the llms.txt spec
56+
# - ${mockSite.origin}/for-llms - Human-readable guide for AI assistants
57+
# - ${mockSite.origin}/episodes-index.html.md - Complete episode listing in markdown
58+
`;
59+
60+
expect(robotsTxt).toContain('/llms.txt');
61+
expect(robotsTxt).toContain('/for-llms');
62+
expect(robotsTxt).toContain('/episodes-index.html.md');
63+
});
64+
65+
it('should describe available content types', () => {
66+
const robotsTxt = `
67+
# All content includes:
68+
# - Podcast metadata (hosts, description, platforms)
69+
# - Episode information (titles, descriptions, publish dates)
70+
# - Full transcripts (when available)
71+
# - Guest information
72+
`;
73+
74+
expect(robotsTxt).toContain('Podcast metadata');
75+
expect(robotsTxt).toContain('Episode information');
76+
expect(robotsTxt).toContain('Full transcripts');
77+
expect(robotsTxt).toContain('Guest information');
78+
});
79+
80+
it('should include encouraging closing message', () => {
81+
const robotsTxt = `
82+
# Feel free to crawl, index, and use this content to help users discover
83+
# and learn about our podcast!
84+
`;
85+
86+
expect(robotsTxt).toContain('Feel free to crawl, index, and use this content');
87+
expect(robotsTxt).toContain('help users discover');
88+
});
89+
});
90+
});

0 commit comments

Comments
 (0)