|
| 1 | +import { htmlToText } from 'html-to-text'; |
| 2 | + |
| 3 | +import type { Episode, Show } from './rss'; |
| 4 | +import type { StarpodConfig } from '../utils/config'; |
| 5 | +import { truncate } from '../utils/truncate'; |
| 6 | + |
| 7 | +/** |
| 8 | + * Format duration in seconds to HH:MM:SS or MM:SS format |
| 9 | + */ |
| 10 | +export function formatDuration(seconds: number): string { |
| 11 | + const hours = Math.floor(seconds / 3600); |
| 12 | + const minutes = Math.floor((seconds % 3600) / 60); |
| 13 | + const secs = Math.floor(seconds % 60); |
| 14 | + |
| 15 | + if (hours > 0) { |
| 16 | + return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`; |
| 17 | + } |
| 18 | + return `${minutes}:${secs.toString().padStart(2, '0')}`; |
| 19 | +} |
| 20 | + |
| 21 | +/** |
| 22 | + * Format timestamp to human-readable date |
| 23 | + */ |
| 24 | +export function formatDate(timestamp: number): string { |
| 25 | + const date = new Date(timestamp); |
| 26 | + return date.toLocaleDateString('en-US', { |
| 27 | + year: 'numeric', |
| 28 | + month: 'long', |
| 29 | + day: 'numeric' |
| 30 | + }); |
| 31 | +} |
| 32 | + |
| 33 | +/** |
| 34 | + * Clean transcript by removing timestamps and extra whitespace |
| 35 | + */ |
| 36 | +export function cleanTranscript(markdown: string): string { |
| 37 | + // Remove timestamp patterns like [00:34] or [01:23:45] |
| 38 | + let cleaned = markdown.replace(/\[\d{1,2}:\d{2}(?::\d{2})?\]/g, ''); |
| 39 | + |
| 40 | + // Remove extra whitespace while preserving paragraph structure |
| 41 | + cleaned = cleaned |
| 42 | + .split('\n') |
| 43 | + .map((line) => line.trim()) |
| 44 | + .join('\n') |
| 45 | + .replace(/\n{3,}/g, '\n\n'); |
| 46 | + |
| 47 | + return cleaned.trim(); |
| 48 | +} |
| 49 | + |
| 50 | +/** |
| 51 | + * Truncate text to specified length |
| 52 | + * Uses the existing truncate utility which also handles HTML sanitization |
| 53 | + */ |
| 54 | +export function truncateDescription(text: string, length: number): string { |
| 55 | + return truncate(text, length); |
| 56 | +} |
| 57 | + |
| 58 | +/** |
| 59 | + * Convert HTML to clean markdown/text |
| 60 | + */ |
| 61 | +export function htmlToMarkdown(html: string): string { |
| 62 | + return htmlToText(html, { |
| 63 | + wordwrap: false, |
| 64 | + preserveNewlines: true, |
| 65 | + selectors: [ |
| 66 | + { selector: 'a', options: { ignoreHref: false } }, |
| 67 | + { selector: 'img', format: 'skip' } |
| 68 | + ] |
| 69 | + }); |
| 70 | +} |
| 71 | + |
| 72 | +/** |
| 73 | + * Generate llms.txt content following the specification |
| 74 | + */ |
| 75 | +export function generateLlmsTxt( |
| 76 | + show: Show, |
| 77 | + recentEpisodes: Episode[], |
| 78 | + config: StarpodConfig, |
| 79 | + siteUrl?: URL |
| 80 | +): string { |
| 81 | + const baseUrl = siteUrl?.origin || ''; |
| 82 | + const hostNames = config.hosts.map((h) => h.name).join(', '); |
| 83 | + |
| 84 | + let content = `# ${show.title}\n\n`; |
| 85 | + content += `> ${config.blurb}\n\n`; |
| 86 | + content += `${config.description}\n\n`; |
| 87 | + content += `Hosted by: ${hostNames}\n\n`; |
| 88 | + |
| 89 | + // Main Documentation |
| 90 | + content += `## Main Documentation\n\n`; |
| 91 | + content += `- [About the Show](${baseUrl}/about.html.md): Information about the podcast and hosts\n`; |
| 92 | + content += `- [For LLMs](${baseUrl}/for-llms.html.md): Comprehensive guide for AI assistants\n`; |
| 93 | + content += `- [Episodes Index](${baseUrl}/episodes-index.html.md): Complete list of all episodes\n\n`; |
| 94 | + |
| 95 | + // Recent Episodes |
| 96 | + if (recentEpisodes.length > 0) { |
| 97 | + content += `## Recent Episodes\n\n`; |
| 98 | + for (const episode of recentEpisodes) { |
| 99 | + const episodeUrl = `${baseUrl}/${episode.episodeSlug}.html.md`; |
| 100 | + const description = truncateDescription(episode.description, 150); |
| 101 | + content += `- [${episode.title}](${episodeUrl}): ${description}\n`; |
| 102 | + } |
| 103 | + content += `\n`; |
| 104 | + } |
| 105 | + |
| 106 | + // Optional section |
| 107 | + content += `## Optional\n\n`; |
| 108 | + content += `- [RSS Feed](${config.rssFeed}): Direct access to podcast RSS\n`; |
| 109 | + |
| 110 | + if (config.platforms.apple) { |
| 111 | + content += `- [Listen on Apple Podcasts](${config.platforms.apple})\n`; |
| 112 | + } |
| 113 | + if (config.platforms.spotify) { |
| 114 | + content += `- [Listen on Spotify](${config.platforms.spotify})\n`; |
| 115 | + } |
| 116 | + if (config.platforms.youtube) { |
| 117 | + content += `- [Watch on YouTube](${config.platforms.youtube})\n`; |
| 118 | + } |
| 119 | + if (config.platforms.overcast) { |
| 120 | + content += `- [Listen on Overcast](${config.platforms.overcast})\n`; |
| 121 | + } |
| 122 | + if (config.platforms.pocketCasts) { |
| 123 | + content += `- [Listen on Pocket Casts](${config.platforms.pocketCasts})\n`; |
| 124 | + } |
| 125 | + |
| 126 | + return content; |
| 127 | +} |
| 128 | + |
| 129 | +/** |
| 130 | + * Generate markdown for a single episode |
| 131 | + */ |
| 132 | +export function generateEpisodeMarkdown( |
| 133 | + episode: Episode, |
| 134 | + show: Show, |
| 135 | + config: StarpodConfig, |
| 136 | + transcriptContent?: string |
| 137 | +): string { |
| 138 | + const hostNames = config.hosts.map((h) => h.name).join(', '); |
| 139 | + const episodeContent = htmlToMarkdown(episode.content); |
| 140 | + |
| 141 | + let markdown = `# Episode ${episode.episodeNumber}: ${episode.title}\n\n`; |
| 142 | + markdown += `**Show**: ${show.title}\n`; |
| 143 | + markdown += `**Published**: ${formatDate(episode.published)}\n`; |
| 144 | + markdown += `**Duration**: ${formatDuration(episode.duration)}\n`; |
| 145 | + markdown += `**Episode Number**: ${episode.episodeNumber}\n\n`; |
| 146 | + |
| 147 | + markdown += `## Hosts\n\n${hostNames}\n\n`; |
| 148 | + |
| 149 | + markdown += `## Description\n\n${episodeContent}\n\n`; |
| 150 | + |
| 151 | + markdown += `## Audio\n\n[Download or listen to episode](${episode.audio.src})\n\n`; |
| 152 | + |
| 153 | + if (transcriptContent) { |
| 154 | + markdown += `## Transcript\n\n${transcriptContent}\n`; |
| 155 | + } else { |
| 156 | + markdown += `## Transcript\n\nTranscript not available for this episode.\n`; |
| 157 | + } |
| 158 | + |
| 159 | + return markdown; |
| 160 | +} |
| 161 | + |
| 162 | +/** |
| 163 | + * Generate episodes index markdown |
| 164 | + */ |
| 165 | +export function generateEpisodesIndex( |
| 166 | + show: Show, |
| 167 | + episodes: Episode[], |
| 168 | + siteUrl?: URL |
| 169 | +): string { |
| 170 | + const baseUrl = siteUrl?.origin || ''; |
| 171 | + |
| 172 | + let markdown = `# ${show.title} - Episodes\n\n`; |
| 173 | + markdown += `Complete listing of all ${episodes.length} episodes.\n\n`; |
| 174 | + markdown += `---\n\n`; |
| 175 | + |
| 176 | + for (const episode of episodes) { |
| 177 | + markdown += `## Episode ${episode.episodeNumber}: ${episode.title}\n\n`; |
| 178 | + markdown += `**Published**: ${formatDate(episode.published)}\n`; |
| 179 | + markdown += `**Duration**: ${formatDuration(episode.duration)}\n`; |
| 180 | + markdown += `**Description**: ${truncateDescription(episode.description, 200)}\n\n`; |
| 181 | + markdown += `[Full episode details](${baseUrl}/${episode.episodeSlug}.html.md) | [Audio](${episode.audio.src})\n\n`; |
| 182 | + markdown += `---\n\n`; |
| 183 | + } |
| 184 | + |
| 185 | + return markdown; |
| 186 | +} |
| 187 | + |
| 188 | +/** |
| 189 | + * Generate for-llms page markdown content |
| 190 | + */ |
| 191 | +export function generateForLlmsMarkdown( |
| 192 | + show: Show, |
| 193 | + episodes: Episode[], |
| 194 | + config: StarpodConfig, |
| 195 | + siteUrl?: URL |
| 196 | +): string { |
| 197 | + const baseUrl = siteUrl?.origin || ''; |
| 198 | + |
| 199 | + let markdown = `# ${show.title} - Guide for AI Assistants\n\n`; |
| 200 | + markdown += `## Podcast Overview\n\n`; |
| 201 | + markdown += `**Tagline**: ${config.blurb}\n\n`; |
| 202 | + markdown += `${config.description}\n\n`; |
| 203 | + |
| 204 | + markdown += `## Hosts\n\n`; |
| 205 | + for (const host of config.hosts) { |
| 206 | + markdown += `### ${host.name}\n\n`; |
| 207 | + markdown += `${host.bio}\n\n`; |
| 208 | + if (host.website || host.github || host.twitter) { |
| 209 | + markdown += `**Links**: `; |
| 210 | + const links = []; |
| 211 | + if (host.website) links.push(`[Website](${host.website})`); |
| 212 | + if (host.github) links.push(`[GitHub](${host.github})`); |
| 213 | + if (host.twitter) links.push(`[Twitter](${host.twitter})`); |
| 214 | + markdown += links.join(' | '); |
| 215 | + markdown += `\n\n`; |
| 216 | + } |
| 217 | + } |
| 218 | + |
| 219 | + markdown += `## Episode Information\n\n`; |
| 220 | + markdown += `- **Total Episodes**: ${episodes.length}\n`; |
| 221 | + |
| 222 | + if (episodes.length > 0) { |
| 223 | + const totalDuration = episodes.reduce((sum, ep) => sum + ep.duration, 0); |
| 224 | + const avgDuration = totalDuration / episodes.length; |
| 225 | + markdown += `- **Average Episode Duration**: ${formatDuration(avgDuration)}\n`; |
| 226 | + |
| 227 | + // Calculate publishing frequency |
| 228 | + if (episodes.length > 1) { |
| 229 | + const newest = episodes[0].published; |
| 230 | + const oldest = episodes[episodes.length - 1].published; |
| 231 | + const daysBetween = (newest - oldest) / (1000 * 60 * 60 * 24); |
| 232 | + const episodesPerWeek = (episodes.length / daysBetween) * 7; |
| 233 | + markdown += `- **Publishing Frequency**: Approximately ${episodesPerWeek.toFixed(1)} episodes per week\n`; |
| 234 | + } |
| 235 | + } |
| 236 | + |
| 237 | + markdown += `\n## Recent Episodes\n\n`; |
| 238 | + const recentEpisodes = episodes.slice(0, 10); |
| 239 | + for (const episode of recentEpisodes) { |
| 240 | + markdown += `- **Episode ${episode.episodeNumber}**: [${episode.title}](${baseUrl}/${episode.episodeSlug}.html.md) - ${formatDate(episode.published)}\n`; |
| 241 | + } |
| 242 | + |
| 243 | + markdown += `\n## Transcript Availability\n\n`; |
| 244 | + markdown += `Transcripts are available for many episodes and are included in the individual episode markdown files. `; |
| 245 | + markdown += `Access any episode at \`/{episode-slug}.html.md\` to view the full transcript if available.\n\n`; |
| 246 | + |
| 247 | + markdown += `## How to Listen\n\n`; |
| 248 | + if (config.platforms.apple) { |
| 249 | + markdown += `- [Apple Podcasts](${config.platforms.apple})\n`; |
| 250 | + } |
| 251 | + if (config.platforms.spotify) { |
| 252 | + markdown += `- [Spotify](${config.platforms.spotify})\n`; |
| 253 | + } |
| 254 | + if (config.platforms.youtube) { |
| 255 | + markdown += `- [YouTube](${config.platforms.youtube})\n`; |
| 256 | + } |
| 257 | + if (config.platforms.overcast) { |
| 258 | + markdown += `- [Overcast](${config.platforms.overcast})\n`; |
| 259 | + } |
| 260 | + if (config.platforms.pocketCasts) { |
| 261 | + markdown += `- [Pocket Casts](${config.platforms.pocketCasts})\n`; |
| 262 | + } |
| 263 | + |
| 264 | + markdown += `\n## RSS Feed\n\n`; |
| 265 | + markdown += `Direct RSS feed access: ${config.rssFeed}\n\n`; |
| 266 | + |
| 267 | + markdown += `## Complete Episode List\n\n`; |
| 268 | + markdown += `For a complete list of all episodes with descriptions, see [Episodes Index](${baseUrl}/episodes-index.html.md).\n`; |
| 269 | + |
| 270 | + return markdown; |
| 271 | +} |
| 272 | + |
| 273 | +/** |
| 274 | + * Generate about page markdown content |
| 275 | + */ |
| 276 | +export function generateAboutMarkdown( |
| 277 | + show: Show, |
| 278 | + config: StarpodConfig |
| 279 | +): string { |
| 280 | + let markdown = `# About ${show.title}\n\n`; |
| 281 | + markdown += `${config.description}\n\n`; |
| 282 | + |
| 283 | + markdown += `## Meet the Hosts\n\n`; |
| 284 | + for (const host of config.hosts) { |
| 285 | + markdown += `### ${host.name}\n\n`; |
| 286 | + markdown += `${host.bio}\n\n`; |
| 287 | + if (host.website || host.github || host.twitter) { |
| 288 | + if (host.twitter) markdown += `- Twitter: ${host.twitter}\n`; |
| 289 | + if (host.github) markdown += `- GitHub: ${host.github}\n`; |
| 290 | + if (host.website) markdown += `- Website: ${host.website}\n`; |
| 291 | + markdown += `\n`; |
| 292 | + } |
| 293 | + } |
| 294 | + |
| 295 | + markdown += `## Listen to the Show\n\n`; |
| 296 | + if (config.platforms.apple) { |
| 297 | + markdown += `- [Apple Podcasts](${config.platforms.apple})\n`; |
| 298 | + } |
| 299 | + if (config.platforms.spotify) { |
| 300 | + markdown += `- [Spotify](${config.platforms.spotify})\n`; |
| 301 | + } |
| 302 | + if (config.platforms.youtube) { |
| 303 | + markdown += `- [YouTube](${config.platforms.youtube})\n`; |
| 304 | + } |
| 305 | + if (config.platforms.overcast) { |
| 306 | + markdown += `- [Overcast](${config.platforms.overcast})\n`; |
| 307 | + } |
| 308 | + if (config.platforms.pocketCasts) { |
| 309 | + markdown += `- [Pocket Casts](${config.platforms.pocketCasts})\n`; |
| 310 | + } |
| 311 | + |
| 312 | + return markdown; |
| 313 | +} |
0 commit comments