-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlaunch.mjs
More file actions
139 lines (127 loc) · 4.7 KB
/
launch.mjs
File metadata and controls
139 lines (127 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/**
* browser-bridge launcher — stealth Chromium + socat-fronted CDP.
*
* Steps in order:
* 1. Configure puppeteer-extra's stealth plugin with the full evasion
* set so navigator.webdriver, navigator.plugins, navigator.languages,
* WebGL vendor strings, etc. all look like a real Chrome session.
* 2. Spawn socat to forward 0.0.0.0:9222 -> 127.0.0.1:9223. Recent
* Chromium versions bind --remote-debugging-port to localhost
* regardless of --remote-debugging-address. socat is the simplest
* portable workaround.
* 3. puppeteer.launch(...) on port 9223 with realistic args.
* 4. Heartbeat log every 60s so log scrapers can confirm liveness.
* 5. SIGTERM/SIGINT teardown.
*
* Env:
* PUPPETEER_EXECUTABLE_PATH — Chromium binary path. Default
* /usr/bin/chromium (Debian).
* HTTPS_PROXY / HTTP_PROXY — passed as --proxy-server when set.
* Used for VPN-fronted scraping.
*/
import { spawn } from 'node:child_process';
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
const stealth = StealthPlugin();
// Enable every evasion the plugin ships with. The default set is partial
// to avoid surprising users; for an automation bridge we want them all.
const ALL_EVASIONS = [
'chrome.app',
'chrome.csi',
'chrome.loadTimes',
'chrome.runtime',
'defaultArgs',
'iframe.contentWindow',
'media.codecs',
'navigator.hardwareConcurrency',
'navigator.languages',
'navigator.permissions',
'navigator.plugins',
'navigator.vendor',
'navigator.webdriver',
'sourceurl',
'user-agent-override',
'webgl.vendor',
'window.outerdimensions',
];
for (const e of ALL_EVASIONS) stealth.enabledEvasions.add(e);
puppeteer.use(stealth);
const CHROME_PATH = process.env.PUPPETEER_EXECUTABLE_PATH ?? '/usr/bin/chromium';
const INTERNAL_PORT = 9223; // chromium binds here (127.0.0.1)
const EXTERNAL_PORT = 9222; // socat exposes this on 0.0.0.0
const socat = spawn('socat', [
`TCP-LISTEN:${EXTERNAL_PORT},fork,reuseaddr,bind=0.0.0.0`,
`TCP:127.0.0.1:${INTERNAL_PORT}`,
], { stdio: 'inherit' });
socat.on('error', (err) => {
console.error('[browser-bridge] socat failed:', err.message);
});
const args = [
// Sandboxing is disabled because we run as a non-root user inside a
// container; Linux unprivileged user namespaces are the broader
// sandbox the host provides.
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--disable-software-rasterizer',
`--remote-debugging-port=${INTERNAL_PORT}`,
'--remote-allow-origins=*',
'--user-data-dir=/home/browser/data',
// Realistic-fingerprint args: missing any one of these is a bot tell.
'--window-size=1920,1080',
'--lang=en-US,en',
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
'--enable-features=NetworkService,NetworkServiceInProcess',
'--enable-webgl',
'--enable-accelerated-2d-canvas',
'--font-render-hinting=medium',
];
const proxy = process.env.HTTPS_PROXY ?? process.env.HTTP_PROXY;
if (proxy) {
args.push(`--proxy-server=${proxy}`);
console.log(`[browser-bridge] routing through proxy: ${proxy}`);
}
console.log('[browser-bridge] launching stealth Chromium...');
let browser;
try {
browser = await puppeteer.launch({
headless: true,
executablePath: CHROME_PATH,
args,
// Drop --enable-automation so navigator.webdriver isn't trivially
// truthy — this is one of the highest-signal bot detectors.
ignoreDefaultArgs: ['--enable-automation'],
});
console.log('[browser-bridge] stealth Chromium running');
console.log(`[browser-bridge] CDP (internal): ws://127.0.0.1:${INTERNAL_PORT}/...`);
console.log(`[browser-bridge] CDP (external): ws://0.0.0.0:${EXTERNAL_PORT}/... (via socat)`);
} catch (err) {
console.error('[browser-bridge] failed to launch:', err.message);
socat.kill();
process.exit(1);
}
const shutdown = async (signal) => {
console.log(`[browser-bridge] ${signal} received, shutting down...`);
try {
await browser.close();
} catch (err) {
console.error('[browser-bridge] error closing browser:', err.message);
}
socat.kill();
process.exit(0);
};
process.on('SIGTERM', () => shutdown('SIGTERM'));
process.on('SIGINT', () => shutdown('SIGINT'));
// Heartbeat — periodically check the browser is still connected.
// If it crashed or detached, exit so the container restarts.
setInterval(async () => {
try {
const pages = await browser.pages();
console.log(`[browser-bridge] alive — ${pages.length} page(s) open`);
} catch {
console.error('[browser-bridge] browser disconnected, exiting');
process.exit(1);
}
}, 60_000).unref();