@@ -70,18 +70,45 @@ const imageRules = [toUrl($ => $('video').attr('poster'))]
7070const _getIframe = ( url , $ , { src } ) =>
7171 loadIframe ( url , $ . load ( `<iframe src="${ src } "></iframe>` ) )
7272
73+ const createGetIframeCached = getIframe => {
74+ const cacheByHtmlDom = new WeakMap ( )
75+
76+ return async ( url , $ , src ) => {
77+ let cacheBySrc = cacheByHtmlDom . get ( $ )
78+ if ( ! cacheBySrc ) {
79+ cacheBySrc = new Map ( )
80+ cacheByHtmlDom . set ( $ , cacheBySrc )
81+ }
82+
83+ const cachedHtmlDom = cacheBySrc . get ( src )
84+ if ( cachedHtmlDom ) return cachedHtmlDom
85+
86+ const pendingHtmlDom = getIframe ( url , $ , { src } ) . catch ( error => {
87+ cacheBySrc . delete ( src )
88+ throw error
89+ } )
90+
91+ cacheBySrc . set ( src , pendingHtmlDom )
92+ return pendingHtmlDom
93+ }
94+ }
95+
7396const withIframe = ( rules , getIframe ) =>
7497 rules . concat (
7598 async ( { htmlDom : $ , url } ) => {
7699 const srcs = $ ( 'iframe[src^="http"], iframe[src^="/"]' )
77100 . map ( ( _ , element ) => $ ( element ) . attr ( 'src' ) )
78101 . get ( )
79102 if ( srcs . length === 0 ) return
103+ const seenSrcs = new Set ( )
80104 for ( const src of srcs ) {
81105 try {
82106 const normalizedSrc = normalizeUrl ( url , src )
83107 if ( ! normalizedSrc ) continue
84- const htmlDom = await getIframe ( url , $ , { src : normalizedSrc } )
108+ if ( seenSrcs . has ( normalizedSrc ) ) continue
109+ seenSrcs . add ( normalizedSrc )
110+
111+ const htmlDom = await getIframe ( url , $ , normalizedSrc )
85112 const result = await findRule ( rules , { htmlDom, url } )
86113 if ( has ( result ) ) return result
87114 } catch ( _ ) { }
@@ -91,17 +118,18 @@ const withIframe = (rules, getIframe) =>
91118 const src = $ ( 'meta[name="twitter:player"]' ) . attr ( 'content' )
92119 return src
93120 ? findRule ( rules , {
94- htmlDom : await getIframe ( url , $ , { src } ) ,
121+ htmlDom : await getIframe ( url , $ , src ) ,
95122 url
96123 } )
97124 : undefined
98125 }
99126 )
100127
101128module . exports = ( { getIframe = _getIframe } = { } ) => {
129+ const getIframeCached = createGetIframeCached ( getIframe )
102130 const rules = {
103- image : withIframe ( imageRules , getIframe ) ,
104- video : withIframe ( videoRules , getIframe )
131+ image : withIframe ( imageRules , getIframeCached ) ,
132+ video : withIframe ( videoRules , getIframeCached )
105133 }
106134
107135 rules . pkgName = 'metascraper-video'
0 commit comments