|
| 1 | +const fs = require('node:fs'); |
| 2 | +const puppeteer = require('puppeteer'); |
| 3 | + |
| 4 | +const BASE_URL = 'https://acode.app'; |
| 5 | +const visited = new Set('https://acode.app'); |
| 6 | +const sitemap = ['https://acode.app']; |
| 7 | + |
| 8 | +async function crawl(url, browser) { |
| 9 | + if (visited.has(url) || !url.startsWith(BASE_URL)) { |
| 10 | + return; |
| 11 | + } |
| 12 | + |
| 13 | + visited.add(url); |
| 14 | + sitemap.push(url); |
| 15 | + console.info(`Crawling: ${url}`); |
| 16 | + |
| 17 | + try { |
| 18 | + const page = await browser.newPage(); |
| 19 | + await page.goto(url, { waitUntil: 'networkidle2' }); |
| 20 | + const links = await page.$$eval('a[href]', (anchors) => anchors.map((a) => new URL(a.getAttribute('href'), location.origin).href)); |
| 21 | + |
| 22 | + await page.close(); |
| 23 | + |
| 24 | + for (const link of links) { |
| 25 | + console.info(`Found link: ${link}`); |
| 26 | + if (!visited.has(link)) { |
| 27 | + await crawl(link, browser); |
| 28 | + } |
| 29 | + } |
| 30 | + } catch (error) { |
| 31 | + console.error(`Failed to crawl ${url}:`, error.message); |
| 32 | + } |
| 33 | +} |
| 34 | + |
| 35 | +async function generateSitemap() { |
| 36 | + const browser = await puppeteer.launch({ |
| 37 | + downloadBehavior: { |
| 38 | + policy: 'deny', |
| 39 | + }, |
| 40 | + args: ['--no-sandbox', '--disable-setuid-sandbox'], |
| 41 | + headless: true, |
| 42 | + ignoreDefaultArgs: ['--enable-automation'], |
| 43 | + }); |
| 44 | + |
| 45 | + try { |
| 46 | + // login using test account |
| 47 | + const page = await browser.newPage(); |
| 48 | + await page.goto(BASE_URL, { waitUntil: 'networkidle2' }); |
| 49 | + await crawl(BASE_URL, browser); |
| 50 | + console.info(`Crawled ${sitemap.length} pages.`); |
| 51 | + const sitemapXml = `<?xml version="1.0" encoding="UTF-8"?> |
| 52 | +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
| 53 | +${sitemap.map((url) => ` <url><loc>${url.replaceAll('&', '&')}</loc></url>`).join('\n')} |
| 54 | +</urlset>`; |
| 55 | + |
| 56 | + fs.writeFileSync('sitemap.xml', sitemapXml); |
| 57 | + process.stdout.write('Sitemap generated successfully!\n'); |
| 58 | + } finally { |
| 59 | + await browser.close(); |
| 60 | + } |
| 61 | +} |
| 62 | + |
| 63 | +generateSitemap(); |
0 commit comments