Skip to content

Commit 045b40f

Browse files
Add LLM discovery support
1 parent 36e2486 commit 045b40f

9 files changed

Lines changed: 1248 additions & 0 deletions

File tree

README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,62 @@ see fit.
110110

111111
We use Turso and Astro DB to setup guests per episode. If you would also like to
112112
do this, you will need a Turso account.
113+
114+
### LLM Discovery Features
115+
116+
Starpod includes built-in support for LLM (Large Language Model) discovery
117+
through the [llms.txt specification](https://llmstxt.org/). This makes your
118+
podcast content easily discoverable and accessible to AI assistants like
119+
ChatGPT, Claude, and others.
120+
121+
#### What's Included
122+
123+
- `/llms.txt` - Structured file following the llms.txt spec that provides an
124+
overview of your podcast and links to detailed content
125+
- `/for-llms` - Human-readable guide page specifically designed for AI
126+
assistants
127+
- Markdown versions of all pages (`.html.md` endpoints) for clean, LLM-friendly
128+
content
129+
- Complete episode index with all episodes and descriptions at
130+
`/episodes-index.html.md`
131+
- Individual episode pages with full transcripts (if available) at
132+
`/{episode-slug}.html.md`
133+
134+
#### How LLMs Can Use Your Podcast
135+
136+
With these features automatically generated from your RSS feed and config, LLMs
137+
can:
138+
139+
- **Discover and recommend** specific episodes based on topics or themes
140+
- **Answer detailed questions** about episode content using full transcripts
141+
- **Summarize episodes** or extract key points and insights
142+
- **Find episodes** with specific guests or covering certain subjects
143+
- **Provide information** about your hosts, show format, and where to listen
144+
145+
#### Transcript Support
146+
147+
If you provide episode transcripts in
148+
`src/content/transcripts/[episode-number].md`, they will automatically be
149+
included in the LLM-accessible content. Transcripts are cleaned (timestamps
150+
removed) and formatted for optimal LLM consumption.
151+
152+
All transcript content is available at `/{episode-slug}.html.md` or
153+
`/{episode-number}.html.md`.
154+
155+
**Note:** Transcripts are optional. The LLM discovery features work perfectly
156+
fine without them, using episode descriptions and metadata from your RSS feed.
157+
158+
#### Generated Endpoints
159+
160+
All of the following endpoints are automatically generated at build time from
161+
your `starpod.config.ts` and RSS feed:
162+
163+
- `/llms.txt` - Main discovery file
164+
- `/for-llms` - Human-readable guide page
165+
- `/for-llms.html.md` - Markdown version of guide
166+
- `/about.html.md` - Markdown version of about page
167+
- `/episodes-index.html.md` - Complete episode listing
168+
- `/{episode-slug}.html.md` - Individual episode with transcript
169+
- `/{episode-number}.html.md` - Alternative episode URL
170+
171+
No configuration needed - it just works!

src/lib/llms.ts

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
import { htmlToText } from 'html-to-text';
2+
3+
import type { Episode, Show } from './rss';
4+
import type { StarpodConfig } from '../utils/config';
5+
import { truncate } from '../utils/truncate';
6+
7+
/**
8+
* Format duration in seconds to HH:MM:SS or MM:SS format
9+
*/
10+
export function formatDuration(seconds: number): string {
11+
const hours = Math.floor(seconds / 3600);
12+
const minutes = Math.floor((seconds % 3600) / 60);
13+
const secs = Math.floor(seconds % 60);
14+
15+
if (hours > 0) {
16+
return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
17+
}
18+
return `${minutes}:${secs.toString().padStart(2, '0')}`;
19+
}
20+
21+
/**
22+
* Format timestamp to human-readable date
23+
*/
24+
export function formatDate(timestamp: number): string {
25+
const date = new Date(timestamp);
26+
return date.toLocaleDateString('en-US', {
27+
year: 'numeric',
28+
month: 'long',
29+
day: 'numeric'
30+
});
31+
}
32+
33+
/**
34+
* Clean transcript by removing timestamps and extra whitespace
35+
*/
36+
export function cleanTranscript(markdown: string): string {
37+
// Remove timestamp patterns like [00:34] or [01:23:45]
38+
let cleaned = markdown.replace(/\[\d{1,2}:\d{2}(?::\d{2})?\]/g, '');
39+
40+
// Remove extra whitespace while preserving paragraph structure
41+
cleaned = cleaned
42+
.split('\n')
43+
.map((line) => line.trim())
44+
.join('\n')
45+
.replace(/\n{3,}/g, '\n\n');
46+
47+
return cleaned.trim();
48+
}
49+
50+
/**
51+
* Truncate text to specified length
52+
* Uses the existing truncate utility which also handles HTML sanitization
53+
*/
54+
export function truncateDescription(text: string, length: number): string {
55+
return truncate(text, length);
56+
}
57+
58+
/**
59+
* Convert HTML to clean markdown/text
60+
*/
61+
export function htmlToMarkdown(html: string): string {
62+
return htmlToText(html, {
63+
wordwrap: false,
64+
preserveNewlines: true,
65+
selectors: [
66+
{ selector: 'a', options: { ignoreHref: false } },
67+
{ selector: 'img', format: 'skip' }
68+
]
69+
});
70+
}
71+
72+
/**
73+
* Generate llms.txt content following the specification
74+
*/
75+
export function generateLlmsTxt(
76+
show: Show,
77+
recentEpisodes: Episode[],
78+
config: StarpodConfig,
79+
siteUrl?: URL
80+
): string {
81+
const baseUrl = siteUrl?.origin || '';
82+
const hostNames = config.hosts.map((h) => h.name).join(', ');
83+
84+
let content = `# ${show.title}\n\n`;
85+
content += `> ${config.blurb}\n\n`;
86+
content += `${config.description}\n\n`;
87+
content += `Hosted by: ${hostNames}\n\n`;
88+
89+
// Main Documentation
90+
content += `## Main Documentation\n\n`;
91+
content += `- [About the Show](${baseUrl}/about.html.md): Information about the podcast and hosts\n`;
92+
content += `- [For LLMs](${baseUrl}/for-llms.html.md): Comprehensive guide for AI assistants\n`;
93+
content += `- [Episodes Index](${baseUrl}/episodes-index.html.md): Complete list of all episodes\n\n`;
94+
95+
// Recent Episodes
96+
if (recentEpisodes.length > 0) {
97+
content += `## Recent Episodes\n\n`;
98+
for (const episode of recentEpisodes) {
99+
const episodeUrl = `${baseUrl}/${episode.episodeSlug}.html.md`;
100+
const description = truncateDescription(episode.description, 150);
101+
content += `- [${episode.title}](${episodeUrl}): ${description}\n`;
102+
}
103+
content += `\n`;
104+
}
105+
106+
// Optional section
107+
content += `## Optional\n\n`;
108+
content += `- [RSS Feed](${config.rssFeed}): Direct access to podcast RSS\n`;
109+
110+
if (config.platforms.apple) {
111+
content += `- [Listen on Apple Podcasts](${config.platforms.apple})\n`;
112+
}
113+
if (config.platforms.spotify) {
114+
content += `- [Listen on Spotify](${config.platforms.spotify})\n`;
115+
}
116+
if (config.platforms.youtube) {
117+
content += `- [Watch on YouTube](${config.platforms.youtube})\n`;
118+
}
119+
if (config.platforms.overcast) {
120+
content += `- [Listen on Overcast](${config.platforms.overcast})\n`;
121+
}
122+
if (config.platforms.pocketCasts) {
123+
content += `- [Listen on Pocket Casts](${config.platforms.pocketCasts})\n`;
124+
}
125+
126+
return content;
127+
}
128+
129+
/**
130+
* Generate markdown for a single episode
131+
*/
132+
export function generateEpisodeMarkdown(
133+
episode: Episode,
134+
show: Show,
135+
config: StarpodConfig,
136+
transcriptContent?: string
137+
): string {
138+
const hostNames = config.hosts.map((h) => h.name).join(', ');
139+
const episodeContent = htmlToMarkdown(episode.content);
140+
141+
let markdown = `# Episode ${episode.episodeNumber}: ${episode.title}\n\n`;
142+
markdown += `**Show**: ${show.title}\n`;
143+
markdown += `**Published**: ${formatDate(episode.published)}\n`;
144+
markdown += `**Duration**: ${formatDuration(episode.duration)}\n`;
145+
markdown += `**Episode Number**: ${episode.episodeNumber}\n\n`;
146+
147+
markdown += `## Hosts\n\n${hostNames}\n\n`;
148+
149+
markdown += `## Description\n\n${episodeContent}\n\n`;
150+
151+
markdown += `## Audio\n\n[Download or listen to episode](${episode.audio.src})\n\n`;
152+
153+
if (transcriptContent) {
154+
markdown += `## Transcript\n\n${transcriptContent}\n`;
155+
} else {
156+
markdown += `## Transcript\n\nTranscript not available for this episode.\n`;
157+
}
158+
159+
return markdown;
160+
}
161+
162+
/**
163+
* Generate episodes index markdown
164+
*/
165+
export function generateEpisodesIndex(
166+
show: Show,
167+
episodes: Episode[],
168+
siteUrl?: URL
169+
): string {
170+
const baseUrl = siteUrl?.origin || '';
171+
172+
let markdown = `# ${show.title} - Episodes\n\n`;
173+
markdown += `Complete listing of all ${episodes.length} episodes.\n\n`;
174+
markdown += `---\n\n`;
175+
176+
for (const episode of episodes) {
177+
markdown += `## Episode ${episode.episodeNumber}: ${episode.title}\n\n`;
178+
markdown += `**Published**: ${formatDate(episode.published)}\n`;
179+
markdown += `**Duration**: ${formatDuration(episode.duration)}\n`;
180+
markdown += `**Description**: ${truncateDescription(episode.description, 200)}\n\n`;
181+
markdown += `[Full episode details](${baseUrl}/${episode.episodeSlug}.html.md) | [Audio](${episode.audio.src})\n\n`;
182+
markdown += `---\n\n`;
183+
}
184+
185+
return markdown;
186+
}
187+
188+
/**
189+
* Generate for-llms page markdown content
190+
*/
191+
export function generateForLlmsMarkdown(
192+
show: Show,
193+
episodes: Episode[],
194+
config: StarpodConfig,
195+
siteUrl?: URL
196+
): string {
197+
const baseUrl = siteUrl?.origin || '';
198+
199+
let markdown = `# ${show.title} - Guide for AI Assistants\n\n`;
200+
markdown += `## Podcast Overview\n\n`;
201+
markdown += `**Tagline**: ${config.blurb}\n\n`;
202+
markdown += `${config.description}\n\n`;
203+
204+
markdown += `## Hosts\n\n`;
205+
for (const host of config.hosts) {
206+
markdown += `### ${host.name}\n\n`;
207+
markdown += `${host.bio}\n\n`;
208+
if (host.website || host.github || host.twitter) {
209+
markdown += `**Links**: `;
210+
const links = [];
211+
if (host.website) links.push(`[Website](${host.website})`);
212+
if (host.github) links.push(`[GitHub](${host.github})`);
213+
if (host.twitter) links.push(`[Twitter](${host.twitter})`);
214+
markdown += links.join(' | ');
215+
markdown += `\n\n`;
216+
}
217+
}
218+
219+
markdown += `## Episode Information\n\n`;
220+
markdown += `- **Total Episodes**: ${episodes.length}\n`;
221+
222+
if (episodes.length > 0) {
223+
const totalDuration = episodes.reduce((sum, ep) => sum + ep.duration, 0);
224+
const avgDuration = totalDuration / episodes.length;
225+
markdown += `- **Average Episode Duration**: ${formatDuration(avgDuration)}\n`;
226+
227+
// Calculate publishing frequency
228+
if (episodes.length > 1) {
229+
const newest = episodes[0].published;
230+
const oldest = episodes[episodes.length - 1].published;
231+
const daysBetween = (newest - oldest) / (1000 * 60 * 60 * 24);
232+
const episodesPerWeek = (episodes.length / daysBetween) * 7;
233+
markdown += `- **Publishing Frequency**: Approximately ${episodesPerWeek.toFixed(1)} episodes per week\n`;
234+
}
235+
}
236+
237+
markdown += `\n## Recent Episodes\n\n`;
238+
const recentEpisodes = episodes.slice(0, 10);
239+
for (const episode of recentEpisodes) {
240+
markdown += `- **Episode ${episode.episodeNumber}**: [${episode.title}](${baseUrl}/${episode.episodeSlug}.html.md) - ${formatDate(episode.published)}\n`;
241+
}
242+
243+
markdown += `\n## Transcript Availability\n\n`;
244+
markdown += `Transcripts are available for many episodes and are included in the individual episode markdown files. `;
245+
markdown += `Access any episode at \`/{episode-slug}.html.md\` to view the full transcript if available.\n\n`;
246+
247+
markdown += `## How to Listen\n\n`;
248+
if (config.platforms.apple) {
249+
markdown += `- [Apple Podcasts](${config.platforms.apple})\n`;
250+
}
251+
if (config.platforms.spotify) {
252+
markdown += `- [Spotify](${config.platforms.spotify})\n`;
253+
}
254+
if (config.platforms.youtube) {
255+
markdown += `- [YouTube](${config.platforms.youtube})\n`;
256+
}
257+
if (config.platforms.overcast) {
258+
markdown += `- [Overcast](${config.platforms.overcast})\n`;
259+
}
260+
if (config.platforms.pocketCasts) {
261+
markdown += `- [Pocket Casts](${config.platforms.pocketCasts})\n`;
262+
}
263+
264+
markdown += `\n## RSS Feed\n\n`;
265+
markdown += `Direct RSS feed access: ${config.rssFeed}\n\n`;
266+
267+
markdown += `## Complete Episode List\n\n`;
268+
markdown += `For a complete list of all episodes with descriptions, see [Episodes Index](${baseUrl}/episodes-index.html.md).\n`;
269+
270+
return markdown;
271+
}
272+
273+
/**
274+
* Generate about page markdown content
275+
*/
276+
export function generateAboutMarkdown(
277+
show: Show,
278+
config: StarpodConfig
279+
): string {
280+
let markdown = `# About ${show.title}\n\n`;
281+
markdown += `${config.description}\n\n`;
282+
283+
markdown += `## Meet the Hosts\n\n`;
284+
for (const host of config.hosts) {
285+
markdown += `### ${host.name}\n\n`;
286+
markdown += `${host.bio}\n\n`;
287+
if (host.website || host.github || host.twitter) {
288+
if (host.twitter) markdown += `- Twitter: ${host.twitter}\n`;
289+
if (host.github) markdown += `- GitHub: ${host.github}\n`;
290+
if (host.website) markdown += `- Website: ${host.website}\n`;
291+
markdown += `\n`;
292+
}
293+
}
294+
295+
markdown += `## Listen to the Show\n\n`;
296+
if (config.platforms.apple) {
297+
markdown += `- [Apple Podcasts](${config.platforms.apple})\n`;
298+
}
299+
if (config.platforms.spotify) {
300+
markdown += `- [Spotify](${config.platforms.spotify})\n`;
301+
}
302+
if (config.platforms.youtube) {
303+
markdown += `- [YouTube](${config.platforms.youtube})\n`;
304+
}
305+
if (config.platforms.overcast) {
306+
markdown += `- [Overcast](${config.platforms.overcast})\n`;
307+
}
308+
if (config.platforms.pocketCasts) {
309+
markdown += `- [Pocket Casts](${config.platforms.pocketCasts})\n`;
310+
}
311+
312+
return markdown;
313+
}

0 commit comments

Comments
 (0)