Skip to content

Commit ac8fc2a

Browse files
authored
perf(audio,video): short-circuit iframe probing on first hit (#810)
1 parent c7317ac commit ac8fc2a

4 files changed

Lines changed: 63 additions & 23 deletions

File tree

packages/metascraper-audio/src/index.js

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ const {
1212
} = require('@metascraper/helpers')
1313

1414
const { find, chain, isEqual } = require('lodash')
15-
const pReflect = require('p-reflect')
1615

1716
const toAudio = toRule(audio)
1817

@@ -88,16 +87,13 @@ module.exports = ({ getIframe = _getIframe } = {}) => {
8887
.map(src => normalizeUrl(url, src))
8988
]
9089
if (srcs.length === 0) return
91-
return pReflect(
92-
Promise.any(
93-
srcs.map(async src => {
94-
const htmlDom = await getIframe(url, $, { src })
95-
const result = await findRule(audioRules, { htmlDom, url })
96-
if (!has(result)) throw TypeError('no result')
97-
return result
98-
})
99-
)
100-
).then(({ value }) => value)
90+
for (const src of srcs) {
91+
try {
92+
const htmlDom = await getIframe(url, $, { src })
93+
const result = await findRule(audioRules, { htmlDom, url })
94+
if (has(result)) return result
95+
} catch (_) {}
96+
}
10197
},
10298
async ({ htmlDom: $, url }) => {
10399
const src = $('meta[name="twitter:player"]').attr('content')

packages/metascraper-audio/test/iframe.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
'use strict'
22

3+
const cheerio = require('cheerio')
34
const test = require('ava')
45

56
const { runServer } = require('./helpers')
@@ -43,3 +44,26 @@ test('ignore non http urls', async t => {
4344
const metadata = await metascraper({ html, url })
4445
t.is(metadata.audio, null)
4546
})
47+
48+
test('stop iframe probing after first audio match', async t => {
49+
const calls = []
50+
const metascraper = createMetascraper({
51+
getIframe: async (url, $, { src }) => {
52+
calls.push(src)
53+
if (src.endsWith('/ok')) {
54+
return cheerio.load(
55+
'<meta property="og:audio" content="https://cdn.microlink.io/file-examples/sample.mp3">'
56+
)
57+
}
58+
throw new Error('should not be called')
59+
}
60+
})
61+
62+
const metadata = await metascraper({
63+
url: 'https://example.com',
64+
html: '<iframe src="/ok"></iframe><iframe src="/skip"></iframe>'
65+
})
66+
67+
t.is(metadata.audio, 'https://cdn.microlink.io/file-examples/sample.mp3')
68+
t.deepEqual(calls, ['https://example.com/ok'])
69+
})

packages/metascraper-video/src/index.js

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ const {
1111
video
1212
} = require('@metascraper/helpers')
1313

14-
const pReflect = require('p-reflect')
15-
1614
const { chain, find, isEqual } = require('lodash')
1715

1816
const toUrl = toRule(urlFn)
@@ -87,16 +85,13 @@ const withIframe = (rules, getIframe) =>
8785
.map(src => normalizeUrl(url, src))
8886
]
8987
if (srcs.length === 0) return
90-
return pReflect(
91-
Promise.any(
92-
srcs.map(async src => {
93-
const htmlDom = await getIframe(url, $, { src })
94-
const result = await findRule(rules, { htmlDom, url })
95-
if (!has(result)) throw TypeError('no result')
96-
return result
97-
})
98-
)
99-
).then(({ value }) => value)
88+
for (const src of srcs) {
89+
try {
90+
const htmlDom = await getIframe(url, $, { src })
91+
const result = await findRule(rules, { htmlDom, url })
92+
if (has(result)) return result
93+
} catch (_) {}
94+
}
10095
},
10196
async ({ htmlDom: $, url }) => {
10297
const src = $('meta[name="twitter:player"]').attr('content')

packages/metascraper-video/test/iframe.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
'use strict'
22

3+
const cheerio = require('cheerio')
34
const test = require('ava')
45

56
const { runServer } = require('./helpers')
@@ -43,3 +44,27 @@ test('ignore non http urls', async t => {
4344
const metadata = await metascraper({ html, url })
4445
t.is(metadata.video, null)
4546
})
47+
48+
test('stop iframe probing after first video match', async t => {
49+
const calls = []
50+
const metascraper = createMetascraper({
51+
getIframe: async (url, $, { src }) => {
52+
calls.push(src)
53+
if (src.endsWith('/ok')) {
54+
return cheerio.load(
55+
'<meta property="og:video" content="https://cdn.microlink.io/file-examples/sample.mp4">'
56+
)
57+
}
58+
throw new Error('should not be called')
59+
}
60+
})
61+
62+
const metadata = await metascraper({
63+
url: 'https://example.com',
64+
html: '<iframe src="/ok"></iframe><iframe src="/skip"></iframe>',
65+
pickPropNames: new Set(['video'])
66+
})
67+
68+
t.is(metadata.video, 'https://cdn.microlink.io/file-examples/sample.mp4')
69+
t.deepEqual(calls, ['https://example.com/ok'])
70+
})

0 commit comments

Comments
 (0)