Skip to content

Commit 3739137

Browse files
Copilotargyleink
andauthored
[WIP] Add robots.TXT file for AI agents (#42)
* Initial plan * Enhance robots.txt to welcome and inform AI agents with comprehensive resource links Co-authored-by: argyleink <1134620+argyleink@users.noreply.github.com> * Refactor: export getRobotsTxt function and update tests to use actual implementation Co-authored-by: argyleink <1134620+argyleink@users.noreply.github.com> * Refactor tests: use beforeEach to reduce duplication in test setup Co-authored-by: argyleink <1134620+argyleink@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: argyleink <1134620+argyleink@users.noreply.github.com>
1 parent 0234ac2 commit 3739137

2 files changed

Lines changed: 66 additions & 2 deletions

File tree

src/pages/robots.txt.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,33 @@
11
import type { APIRoute } from 'astro';
22

3-
const getRobotsTxt = (sitemapURL: URL) => `
3+
export const getRobotsTxt = (sitemapURL: URL, siteURL: URL) => `
4+
# Welcome AI agents and crawlers!
5+
# This is a podcast website built with Starpod - all content is freely accessible.
6+
47
User-agent: *
58
Allow: /
69
10+
# Sitemap for all pages
711
Sitemap: ${sitemapURL.href}
12+
13+
# Special resources for AI/LLM agents:
14+
# - ${siteURL.origin}/llms.txt - Structured overview following the llms.txt spec
15+
# - ${siteURL.origin}/for-llms - Human-readable guide for AI assistants
16+
# - ${siteURL.origin}/episodes-index.html.md - Complete episode listing in markdown
17+
# - ${siteURL.origin}/[episode-slug].html.md - Individual episodes with transcripts
18+
#
19+
# All content includes:
20+
# - Podcast metadata (hosts, description, platforms)
21+
# - Episode information (titles, descriptions, publish dates)
22+
# - Full transcripts (when available)
23+
# - Guest information
24+
#
25+
# Feel free to crawl, index, and use this content to help users discover
26+
# and learn about our podcast!
827
`;
928

1029
export const GET: APIRoute = ({ site }) => {
1130
const sitemapURL = new URL('sitemap-index.xml', site);
12-
return new Response(getRobotsTxt(sitemapURL));
31+
const siteURL = new URL(site!);
32+
return new Response(getRobotsTxt(sitemapURL, siteURL));
1333
};

tests/unit/robots.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { describe, expect, it, beforeEach } from 'vitest';
2+
import { getRobotsTxt } from '../../src/pages/robots.txt';
3+
4+
describe('Robots.txt', () => {
5+
let mockSite: URL;
6+
let mockSitemap: URL;
7+
let robotsTxt: string;
8+
9+
beforeEach(() => {
10+
mockSite = new URL('https://whiskey.fm');
11+
mockSitemap = new URL('sitemap-index.xml', mockSite);
12+
robotsTxt = getRobotsTxt(mockSitemap, mockSite);
13+
});
14+
15+
describe('Generated content', () => {
16+
it('should contain welcoming comment for AI agents', () => {
17+
expect(robotsTxt).toContain('Welcome AI agents and crawlers');
18+
expect(robotsTxt).toContain('User-agent: *');
19+
expect(robotsTxt).toContain('Allow: /');
20+
});
21+
22+
it('should include sitemap reference', () => {
23+
expect(robotsTxt).toContain('Sitemap: https://whiskey.fm/sitemap-index.xml');
24+
});
25+
26+
it('should reference LLM-specific resources', () => {
27+
expect(robotsTxt).toContain('/llms.txt');
28+
expect(robotsTxt).toContain('/for-llms');
29+
expect(robotsTxt).toContain('/episodes-index.html.md');
30+
});
31+
32+
it('should describe available content types', () => {
33+
expect(robotsTxt).toContain('Podcast metadata');
34+
expect(robotsTxt).toContain('Episode information');
35+
expect(robotsTxt).toContain('Full transcripts');
36+
expect(robotsTxt).toContain('Guest information');
37+
});
38+
39+
it('should include encouraging closing message', () => {
40+
expect(robotsTxt).toContain('Feel free to crawl, index, and use this content');
41+
expect(robotsTxt).toContain('help users discover');
42+
});
43+
});
44+
});

0 commit comments

Comments
 (0)