Skip to content

Commit c7317ac

Browse files
authored
fix(helpers): terminate iframe workers after settle (#809)
1 parent f92682a commit c7317ac

2 files changed

Lines changed: 74 additions & 5 deletions

File tree

packages/metascraper-helpers/src/load-iframe/index.js

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,37 @@ const SCRIPT_PATH = (
1010

1111
module.exports = (url, $, { timeout = 5000 } = {}) => {
1212
const worker = new Worker(SCRIPT_PATH(), {
13-
workerData: { url, html: $.html(), timeout },
14-
stdout: true,
15-
stderr: true
13+
workerData: { url, html: $.html(), timeout }
1614
})
1715
const { promise, resolve, reject } = Promise.withResolvers()
18-
worker.on('message', html => resolve($.load(html || '')))
19-
worker.on('error', reject)
16+
let settled = false
17+
18+
const onMessage = html => {
19+
if (settled) return
20+
settled = true
21+
resolve($.load(html || ''))
22+
worker.terminate().catch(() => {})
23+
}
24+
25+
const onError = error => {
26+
if (settled) return
27+
settled = true
28+
reject(error)
29+
worker.terminate().catch(() => {})
30+
}
31+
32+
const onExit = code => {
33+
if (settled) return
34+
settled = true
35+
if (code === 0) {
36+
resolve($.load(''))
37+
return
38+
}
39+
reject(new Error(`loadIframe worker exited unexpectedly with code ${code}`))
40+
}
41+
42+
worker.once('message', onMessage)
43+
worker.once('error', onError)
44+
worker.once('exit', onExit)
2045
return promise
2146
}

packages/metascraper-helpers/test/load-iframe.js

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
'use strict'
22

33
const cheerio = require('cheerio')
4+
const { spawn } = require('child_process')
5+
const path = require('path')
46
const test = require('ava')
57

68
const { loadIframe } = require('..')
@@ -39,3 +41,45 @@ test('markup is correct', async t => {
3941
)
4042
t.snapshot(normalizeTransistorAssetUrls($.html()))
4143
})
44+
45+
test('worker does not keep process alive after resolving', async t => {
46+
const script = `
47+
const cheerio = require('cheerio')
48+
const { loadIframe } = require('./src')
49+
;(async () => {
50+
const src = 'data:text/html,<html><body><script>setInterval(() => {}, 1000)<\\\\/script></body></html>'
51+
const $ = cheerio.load(\`<iframe src="\${src}"></iframe>\`)
52+
await loadIframe('https://example.com', $, { timeout: 200 })
53+
})().catch(error => {
54+
console.error(error)
55+
process.exit(1)
56+
})
57+
`
58+
59+
await new Promise((resolve, reject) => {
60+
const child = spawn(process.execPath, ['-e', script], {
61+
cwd: path.resolve(__dirname, '..'),
62+
stdio: ['ignore', 'ignore', 'pipe']
63+
})
64+
65+
let stderr = ''
66+
child.stderr.on('data', chunk => {
67+
stderr += String(chunk)
68+
})
69+
70+
const timeoutId = setTimeout(() => {
71+
child.kill('SIGKILL')
72+
reject(
73+
new Error('Child process did not exit in time after loadIframe resolve')
74+
)
75+
}, 3000)
76+
77+
child.once('exit', code => {
78+
clearTimeout(timeoutId)
79+
if (code === 0) return resolve()
80+
reject(new Error(`Child process failed with code ${code}: ${stderr}`))
81+
})
82+
})
83+
84+
t.pass()
85+
})

0 commit comments

Comments
 (0)