Skip to content

Commit c175eb4

Browse files
authored
perf(audio,video): replace lodash media scans with native loops (#811)
1 parent 64aa483 commit c175eb4

2 files changed

Lines changed: 47 additions & 60 deletions

File tree

packages/metascraper-audio/src/index.js

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,29 @@ const {
1111
toRule
1212
} = require('@metascraper/helpers')
1313

14-
const { find, chain, isEqual } = require('lodash')
15-
1614
const toAudio = toRule(audio)
1715

16+
const getMediaType = domNode => {
17+
const type = domNode?.attribs.type
18+
if (!type) return
19+
const mediaType = type.split(';')[0]?.split('/')[1]
20+
if (!mediaType) return
21+
if (mediaType === 'mpeg' || mediaType === 'mp4') return 'mp3'
22+
return mediaType
23+
}
24+
1825
const toAudioFromDom = toRule((domNodes, opts) => {
19-
const values = chain(domNodes)
20-
.map(domNode => ({
21-
src: domNode?.attribs.src,
22-
type: chain(domNode)
23-
.get('attribs.type')
24-
.split(';')
25-
.get(0)
26-
.split('/')
27-
.get(1)
28-
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
29-
.replace('mpeg', 'mp3')
30-
/* mp4 is commonly used for video */
31-
.replace('mp4', 'mp3')
32-
.value()
33-
}))
34-
.uniqWith(isEqual)
35-
.value()
26+
const seen = new Set()
27+
for (const domNode of domNodes) {
28+
const src = domNode?.attribs.src
29+
const type = getMediaType(domNode)
30+
const key = `${src}::${type}`
31+
if (seen.has(key)) continue
32+
seen.add(key)
3633

37-
let result
38-
find(
39-
values,
40-
({ src, type }) => (result = audio(src, Object.assign({ type }, opts)))
41-
)
42-
return result
34+
const result = audio(src, { type, ...opts })
35+
if (result !== undefined) return result
36+
}
4337
})
4438

4539
const audioRules = [
@@ -80,16 +74,15 @@ module.exports = ({ getIframe = _getIframe } = {}) => {
8074
const rules = {
8175
audio: audioRules.concat(
8276
async ({ htmlDom: $, url }) => {
83-
const srcs = [
84-
...new $('iframe[src^="http"], iframe[src^="/"]')
85-
.map((_, element) => $(element).attr('src'))
86-
.get()
87-
.map(src => normalizeUrl(url, src))
88-
]
77+
const srcs = $('iframe[src^="http"], iframe[src^="/"]')
78+
.map((_, element) => $(element).attr('src'))
79+
.get()
8980
if (srcs.length === 0) return
9081
for (const src of srcs) {
9182
try {
92-
const htmlDom = await getIframe(url, $, { src })
83+
const normalizedSrc = normalizeUrl(url, src)
84+
if (!normalizedSrc) continue
85+
const htmlDom = await getIframe(url, $, { src: normalizedSrc })
9386
const result = await findRule(audioRules, { htmlDom, url })
9487
if (has(result)) return result
9588
} catch (_) {}

packages/metascraper-video/src/index.js

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,28 @@ const {
1111
video
1212
} = require('@metascraper/helpers')
1313

14-
const { chain, find, isEqual } = require('lodash')
15-
1614
const toUrl = toRule(urlFn)
1715

1816
const toVideo = toRule(video)
1917

18+
const getMediaType = domNode => {
19+
const type = domNode?.attribs.type
20+
if (!type) return
21+
return type.split(';')[0]?.split('/')[1]
22+
}
23+
2024
const toVideoFromDom = toRule((domNodes, opts) => {
21-
const values = chain(domNodes)
22-
.map(domNode => ({
23-
src: domNode?.attribs.src,
24-
type: chain(domNode)
25-
.get('attribs.type')
26-
.split(';')
27-
.get(0)
28-
.split('/')
29-
.get(1)
30-
.value()
31-
}))
32-
.uniqWith(isEqual)
33-
.value()
25+
const seen = new Set()
26+
for (const domNode of domNodes) {
27+
const src = domNode?.attribs.src
28+
const type = getMediaType(domNode)
29+
const key = `${src}::${type}`
30+
if (seen.has(key)) continue
31+
seen.add(key)
3432

35-
let result
36-
find(
37-
values,
38-
({ src, type }) => (result = video(src, Object.assign({ type }, opts)))
39-
)
40-
return result
33+
const result = video(src, { type, ...opts })
34+
if (result !== undefined) return result
35+
}
4136
})
4237

4338
const videoRules = [
@@ -78,16 +73,15 @@ const _getIframe = (url, $, { src }) =>
7873
const withIframe = (rules, getIframe) =>
7974
rules.concat(
8075
async ({ htmlDom: $, url }) => {
81-
const srcs = [
82-
...new $('iframe[src^="http"], iframe[src^="/"]')
83-
.map((_, element) => $(element).attr('src'))
84-
.get()
85-
.map(src => normalizeUrl(url, src))
86-
]
76+
const srcs = $('iframe[src^="http"], iframe[src^="/"]')
77+
.map((_, element) => $(element).attr('src'))
78+
.get()
8779
if (srcs.length === 0) return
8880
for (const src of srcs) {
8981
try {
90-
const htmlDom = await getIframe(url, $, { src })
82+
const normalizedSrc = normalizeUrl(url, src)
83+
if (!normalizedSrc) continue
84+
const htmlDom = await getIframe(url, $, { src: normalizedSrc })
9185
const result = await findRule(rules, { htmlDom, url })
9286
if (has(result)) return result
9387
} catch (_) {}

0 commit comments

Comments
 (0)