@@ -11,35 +11,29 @@ const {
1111 toRule
1212} = require ( '@metascraper/helpers' )
1313
14- const { find, chain, isEqual } = require ( 'lodash' )
15-
1614const toAudio = toRule ( audio )
1715
16+ const getMediaType = domNode => {
17+ const type = domNode ?. attribs . type
18+ if ( ! type ) return
19+ const mediaType = type . split ( ';' ) [ 0 ] ?. split ( '/' ) [ 1 ]
20+ if ( ! mediaType ) return
21+ if ( mediaType === 'mpeg' || mediaType === 'mp4' ) return 'mp3'
22+ return mediaType
23+ }
24+
1825const toAudioFromDom = toRule ( ( domNodes , opts ) => {
19- const values = chain ( domNodes )
20- . map ( domNode => ( {
21- src : domNode ?. attribs . src ,
22- type : chain ( domNode )
23- . get ( 'attribs.type' )
24- . split ( ';' )
25- . get ( 0 )
26- . split ( '/' )
27- . get ( 1 )
28- // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
29- . replace ( 'mpeg' , 'mp3' )
30- /* mp4 is commonly used for video */
31- . replace ( 'mp4' , 'mp3' )
32- . value ( )
33- } ) )
34- . uniqWith ( isEqual )
35- . value ( )
26+ const seen = new Set ( )
27+ for ( const domNode of domNodes ) {
28+ const src = domNode ?. attribs . src
29+ const type = getMediaType ( domNode )
30+ const key = `${ src } ::${ type } `
31+ if ( seen . has ( key ) ) continue
32+ seen . add ( key )
3633
37- let result
38- find (
39- values ,
40- ( { src, type } ) => ( result = audio ( src , Object . assign ( { type } , opts ) ) )
41- )
42- return result
34+ const result = audio ( src , { type, ...opts } )
35+ if ( result !== undefined ) return result
36+ }
4337} )
4438
4539const audioRules = [
@@ -80,16 +74,15 @@ module.exports = ({ getIframe = _getIframe } = {}) => {
8074 const rules = {
8175 audio : audioRules . concat (
8276 async ( { htmlDom : $ , url } ) => {
83- const srcs = [
84- ...new $ ( 'iframe[src^="http"], iframe[src^="/"]' )
85- . map ( ( _ , element ) => $ ( element ) . attr ( 'src' ) )
86- . get ( )
87- . map ( src => normalizeUrl ( url , src ) )
88- ]
77+ const srcs = $ ( 'iframe[src^="http"], iframe[src^="/"]' )
78+ . map ( ( _ , element ) => $ ( element ) . attr ( 'src' ) )
79+ . get ( )
8980 if ( srcs . length === 0 ) return
9081 for ( const src of srcs ) {
9182 try {
92- const htmlDom = await getIframe ( url , $ , { src } )
83+ const normalizedSrc = normalizeUrl ( url , src )
84+ if ( ! normalizedSrc ) continue
85+ const htmlDom = await getIframe ( url , $ , { src : normalizedSrc } )
9386 const result = await findRule ( audioRules , { htmlDom, url } )
9487 if ( has ( result ) ) return result
9588 } catch ( _ ) { }
0 commit comments