Skip to content

Commit a54cc73

Browse files
authored
perf(video): share iframe fetch cache across rules (#817)
1 parent bb90b1c commit a54cc73

2 files changed

Lines changed: 57 additions & 4 deletions

File tree

packages/metascraper-video/src/index.js

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,45 @@ const imageRules = [toUrl($ => $('video').attr('poster'))]
7070
const _getIframe = (url, $, { src }) =>
7171
loadIframe(url, $.load(`<iframe src="${src}"></iframe>`))
7272

73+
const createGetIframeCached = getIframe => {
74+
const cacheByHtmlDom = new WeakMap()
75+
76+
return async (url, $, src) => {
77+
let cacheBySrc = cacheByHtmlDom.get($)
78+
if (!cacheBySrc) {
79+
cacheBySrc = new Map()
80+
cacheByHtmlDom.set($, cacheBySrc)
81+
}
82+
83+
const cachedHtmlDom = cacheBySrc.get(src)
84+
if (cachedHtmlDom) return cachedHtmlDom
85+
86+
const pendingHtmlDom = getIframe(url, $, { src }).catch(error => {
87+
cacheBySrc.delete(src)
88+
throw error
89+
})
90+
91+
cacheBySrc.set(src, pendingHtmlDom)
92+
return pendingHtmlDom
93+
}
94+
}
95+
7396
const withIframe = (rules, getIframe) =>
7497
rules.concat(
7598
async ({ htmlDom: $, url }) => {
7699
const srcs = $('iframe[src^="http"], iframe[src^="/"]')
77100
.map((_, element) => $(element).attr('src'))
78101
.get()
79102
if (srcs.length === 0) return
103+
const seenSrcs = new Set()
80104
for (const src of srcs) {
81105
try {
82106
const normalizedSrc = normalizeUrl(url, src)
83107
if (!normalizedSrc) continue
84-
const htmlDom = await getIframe(url, $, { src: normalizedSrc })
108+
if (seenSrcs.has(normalizedSrc)) continue
109+
seenSrcs.add(normalizedSrc)
110+
111+
const htmlDom = await getIframe(url, $, normalizedSrc)
85112
const result = await findRule(rules, { htmlDom, url })
86113
if (has(result)) return result
87114
} catch (_) {}
@@ -91,17 +118,18 @@ const withIframe = (rules, getIframe) =>
91118
const src = $('meta[name="twitter:player"]').attr('content')
92119
return src
93120
? findRule(rules, {
94-
htmlDom: await getIframe(url, $, { src }),
121+
htmlDom: await getIframe(url, $, src),
95122
url
96123
})
97124
: undefined
98125
}
99126
)
100127

101128
module.exports = ({ getIframe = _getIframe } = {}) => {
129+
const getIframeCached = createGetIframeCached(getIframe)
102130
const rules = {
103-
image: withIframe(imageRules, getIframe),
104-
video: withIframe(videoRules, getIframe)
131+
image: withIframe(imageRules, getIframeCached),
132+
video: withIframe(videoRules, getIframeCached)
105133
}
106134

107135
rules.pkgName = 'metascraper-video'

packages/metascraper-video/test/iframe.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,28 @@ test('stop iframe probing after first video match', async t => {
6868
t.is(metadata.video, 'https://cdn.microlink.io/file-examples/sample.mp4')
6969
t.deepEqual(calls, ['https://example.com/ok'])
7070
})
71+
72+
test('reuse iframe fetch across image and video extraction', async t => {
73+
let calls = 0
74+
const metascraper = createMetascraper({
75+
getIframe: async (url, $, { src }) => {
76+
calls += 1
77+
t.is(src, 'https://example.com/ok')
78+
return cheerio.load(`
79+
<video
80+
poster="https://cdn.microlink.io/file-examples/sample.png"
81+
src="https://cdn.microlink.io/file-examples/sample.mp4"
82+
></video>
83+
`)
84+
}
85+
})
86+
87+
const metadata = await metascraper({
88+
url: 'https://example.com',
89+
html: '<iframe src="/ok"></iframe>'
90+
})
91+
92+
t.is(metadata.image, 'https://cdn.microlink.io/file-examples/sample.png')
93+
t.is(metadata.video, 'https://cdn.microlink.io/file-examples/sample.mp4')
94+
t.is(calls, 1)
95+
})

0 commit comments

Comments
 (0)