-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathurl-fetch.js
More file actions
60 lines (53 loc) · 1.98 KB
/
url-fetch.js
File metadata and controls
60 lines (53 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
const fs = require('fs');
const path = require('path');
const { chromium } = require('playwright');
function sanitizeFileToken(value) {
return String(value || '')
.toLowerCase()
.replace(/https?:\/\//g, '')
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 80) || 'page';
}
function buildSnapshotPath(url, outputPath, explicitPath) {
if (explicitPath) return path.resolve(explicitPath);
const outResolved = path.resolve(outputPath || 'engine/out/comic.png');
const outDir = path.dirname(outResolved);
const outBase = path.basename(outResolved, path.extname(outResolved));
const slug = sanitizeFileToken(url);
return path.join(outDir, `${outBase}.snapshot.${slug}.html`);
}
async function fetchUrlToHtmlSnapshot(url, snapshotPath, options = {}) {
const target = String(url || '').trim();
if (!/^https?:\/\//i.test(target)) {
throw new Error(`URL must start with http:// or https:// (got: ${target})`);
}
const timeoutMs = Math.max(5000, Number(options.timeoutMs || 45000));
const waitUntil = String(options.waitUntil || 'domcontentloaded');
const resolvedSnapshotPath = path.resolve(snapshotPath);
await fs.promises.mkdir(path.dirname(resolvedSnapshotPath), { recursive: true });
const browser = await chromium.launch({ headless: true });
try {
const page = await browser.newPage();
await page.goto(target, { waitUntil, timeout: timeoutMs });
// Try to let lazy content render without stalling too long.
try {
await page.waitForLoadState('networkidle', { timeout: Math.min(10000, timeoutMs) });
} catch (_) {}
const html = await page.content();
await fs.promises.writeFile(resolvedSnapshotPath, html, 'utf8');
const title = await page.title();
return {
snapshotPath: resolvedSnapshotPath,
finalUrl: page.url(),
title: String(title || '').trim()
};
} finally {
await browser.close();
}
}
module.exports = {
sanitizeFileToken,
buildSnapshotPath,
fetchUrlToHtmlSnapshot
};