@@ -6,14 +6,21 @@ import { matchesCompiledRulePatterns, matchesRuleTextHints, passesRulePrefilter
66import { isDetectablePageUrl } from '@/utils/page-support'
77import { cleanTechnologyUrl } from '@/utils/url'
88
9- const BUNDLE_LICENSE_SCHEMA_VERSION = 1
9+ const BUNDLE_LICENSE_SCHEMA_VERSION = 2
1010const BUNDLE_LICENSE_SOURCE = 'JS 版权注释'
1111const MAX_CANDIDATE_SCRIPTS = 5
1212const MAX_FETCH_BYTES = 384 * 1024
13+ const MAX_RANGE_SAMPLE_BYTES = 160 * 1024
14+ const MAX_TOTAL_SAMPLE_BYTES = 2 * 1024 * 1024
15+ const MIN_SAMPLE_BYTES = 24 * 1024
16+ const MAX_RANGE_SAMPLES_PER_SCRIPT = 6
17+ const MAX_RANGE_SAMPLES_PER_SCAN = 10
1318const MAX_SIDECAR_BYTES = 160 * 1024
1419const MAX_LICENSE_TEXT_CHARS = 180_000
1520const FETCH_TIMEOUT_MS = 6000
21+ const MAX_SCAN_MS = 8000
1622const SCAN_DELAY_MS = 1400
23+ const RANGE_SAMPLE_RATIOS = [ 0.25 , 0.5 , 0.8 , 0.835 , 0.9 , 1 ] as const
1724
1825const bundleLicenseTimers = new Map < number , ReturnType < typeof setTimeout > > ( )
1926
@@ -29,8 +36,42 @@ type ScriptLicenseObservation = {
2936 sidecarUrl ?: string
3037}
3138
39+ type RangeFetchResult = {
40+ rangeSupported : boolean
41+ text : string
42+ totalBytes ?: number
43+ }
44+
45+ type ScanBudget = {
46+ deadline : number
47+ remainingBytes : number
48+ remainingRangeSamples : number
49+ }
50+
3251const unique = ( items : string [ ] ) => [ ...new Set ( items . filter ( Boolean ) ) ]
3352
53+ const createScanBudget = ( ) : ScanBudget => ( {
54+ deadline : Date . now ( ) + MAX_SCAN_MS ,
55+ remainingBytes : MAX_TOTAL_SAMPLE_BYTES ,
56+ remainingRangeSamples : MAX_RANGE_SAMPLES_PER_SCAN
57+ } )
58+
59+ const hasScanBudget = ( budget : ScanBudget ) : boolean => budget . remainingBytes >= MIN_SAMPLE_BYTES && Date . now ( ) < budget . deadline
60+
61+ const claimFetchBytes = ( budget : ScanBudget , maxBytes : number ) : number => {
62+ if ( ! hasScanBudget ( budget ) ) return 0
63+ const bytes = Math . min ( Math . max ( 1 , Math . floor ( maxBytes ) ) , budget . remainingBytes )
64+ if ( bytes < MIN_SAMPLE_BYTES ) return 0
65+ budget . remainingBytes -= bytes
66+ return bytes
67+ }
68+
69+ const remainingTimeoutMs = ( budget : ScanBudget ) : number => Math . max ( 1 , Math . min ( FETCH_TIMEOUT_MS , budget . deadline - Date . now ( ) ) )
70+
71+ const yieldToEventLoop = async ( ) : Promise < void > => {
72+ await new Promise < void > ( resolve => setTimeout ( resolve , 0 ) )
73+ }
74+
3475const toAbsoluteHttpUrl = ( value : unknown , baseUrl : string ) : string => {
3576 const text = String ( value || '' ) . trim ( )
3677 if ( ! text ) return ''
@@ -136,27 +177,81 @@ const isTextLikeResponse = (url: string, response: Response): boolean => {
136177 return / j a v a s c r i p t | e c m a s c r i p t | t e x t | p l a i n | o c t e t - s t r e a m / i. test ( contentType )
137178}
138179
139- const fetchLimitedText = async ( url : string , maxBytes : number ) : Promise < string > => {
180+ const parseContentRangeTotal = ( value : string | null ) : number | undefined => {
181+ const match = value ?. match ( / \/ ( \d + ) \s * $ / )
182+ if ( ! match ) return undefined
183+ const total = Number ( match [ 1 ] )
184+ return Number . isFinite ( total ) && total > 0 ? total : undefined
185+ }
186+
187+ const fetchTextRange = async ( url : string , start : number , maxBytes : number , budget : ScanBudget ) : Promise < RangeFetchResult > => {
188+ const claimedBytes = claimFetchBytes ( budget , maxBytes )
189+ if ( ! claimedBytes ) return { rangeSupported : false , text : '' }
190+
140191 const controller = new AbortController ( )
141- const timeout = setTimeout ( ( ) => controller . abort ( ) , FETCH_TIMEOUT_MS )
192+ const timeout = setTimeout ( ( ) => controller . abort ( ) , remainingTimeoutMs ( budget ) )
193+ const safeStart = Math . max ( 0 , Math . floor ( start ) )
194+ const safeMaxBytes = Math . max ( 1 , Math . floor ( claimedBytes ) )
195+ const end = safeStart + safeMaxBytes - 1
142196
143197 try {
144198 const response = await fetch ( url , {
145199 cache : 'force-cache' ,
146200 credentials : 'omit' ,
147- headers : { Range : `bytes=0- ${ maxBytes - 1 } ` } ,
201+ headers : { Range : `bytes=${ safeStart } - ${ end } ` } ,
148202 signal : controller . signal
149203 } )
150- if ( ! response . ok ) return ''
151- if ( ! isTextLikeResponse ( url , response ) ) return ''
152- return readLimitedResponseText ( response , maxBytes )
204+ if ( ! response . ok ) return { rangeSupported : false , text : '' }
205+ if ( ! isTextLikeResponse ( url , response ) ) return { rangeSupported : false , text : '' }
206+
207+ const rangeSupported = response . status === 206
208+ const totalBytes = parseContentRangeTotal ( response . headers . get ( 'content-range' ) )
209+ return {
210+ rangeSupported,
211+ text : await readLimitedResponseText ( response , safeMaxBytes ) ,
212+ totalBytes
213+ }
153214 } catch {
154- return ''
215+ return { rangeSupported : false , text : '' }
155216 } finally {
156217 clearTimeout ( timeout )
157218 }
158219}
159220
221+ const fetchLimitedText = async ( url : string , maxBytes : number , budget : ScanBudget ) : Promise < string > =>
222+ ( await fetchTextRange ( url , 0 , maxBytes , budget ) ) . text
223+
224+ const buildRangeSampleStarts = ( totalBytes : number ) : number [ ] => {
225+ if ( ! Number . isFinite ( totalBytes ) || totalBytes <= MAX_FETCH_BYTES + MAX_RANGE_SAMPLE_BYTES ) return [ ]
226+
227+ const maxStart = Math . max ( 0 , totalBytes - MAX_RANGE_SAMPLE_BYTES )
228+ const starts : number [ ] = [ ]
229+ for ( const ratio of RANGE_SAMPLE_RATIOS ) {
230+ const start = ratio >= 1 ? maxStart : Math . floor ( maxStart * ratio )
231+ if ( start <= MAX_FETCH_BYTES ) continue
232+ if ( starts . some ( item => Math . abs ( item - start ) < MAX_RANGE_SAMPLE_BYTES / 2 ) ) continue
233+ starts . push ( start )
234+ }
235+
236+ return starts . sort ( ( a , b ) => a - b ) . slice ( 0 , MAX_RANGE_SAMPLES_PER_SCRIPT )
237+ }
238+
239+ const fetchSampledScriptText = async ( url : string , budget : ScanBudget ) : Promise < string > => {
240+ const head = await fetchTextRange ( url , 0 , MAX_FETCH_BYTES , budget )
241+ const chunks = [ head . text ]
242+ if ( ! head . rangeSupported || ! head . totalBytes ) return chunks . join ( '\n' )
243+
244+ for ( const start of buildRangeSampleStarts ( head . totalBytes ) ) {
245+ if ( ! hasScanBudget ( budget ) || budget . remainingRangeSamples <= 0 ) break
246+ budget . remainingRangeSamples -= 1
247+ await yieldToEventLoop ( )
248+ const result = await fetchTextRange ( url , start , MAX_RANGE_SAMPLE_BYTES , budget )
249+ if ( result . text ) chunks . push ( result . text )
250+ }
251+
252+ return chunks . join ( '\n' )
253+ }
254+
160255const isLicenseComment = ( comment : string ) : boolean =>
161256 / ^ \/ \* ! / . test ( comment ) || / @ (?: l i c e n s e | p r e s e r v e ) | c o p y r i g h t | l i c e n s e d u n d e r | l i c e n s e i n f o r m a t i o n / i. test ( comment )
162257
@@ -167,22 +262,25 @@ const trimLicenseText = (text: string): string => {
167262
168263const extractLicenseComments = ( source : string ) : string [ ] => {
169264 const comments : string [ ] = [ ]
265+ let commentChars = 0
170266 const blockCommentPattern = / \/ \* [ \s \S ] * ?\* \/ / g
171267 let blockMatch : RegExpExecArray | null
172268
173269 while ( ( blockMatch = blockCommentPattern . exec ( source ) ) ) {
174270 const comment = blockMatch [ 0 ]
175271 if ( isLicenseComment ( comment ) ) {
176272 comments . push ( comment )
273+ commentChars += comment . length + 1
177274 }
178- if ( comments . join ( '\n' ) . length >= MAX_LICENSE_TEXT_CHARS ) break
275+ if ( commentChars >= MAX_LICENSE_TEXT_CHARS ) break
179276 }
180277
181278 const lineCommentPattern = / ^ \s * \/ \/ [ ^ \n ] * (?: @ l i c e n s e | @ p r e s e r v e | c o p y r i g h t | l i c e n s e ) [ ^ \n ] * (?: \n \s * \/ \/ [ ^ \n ] * ) { 0 , 8 } / gim
182279 let lineMatch : RegExpExecArray | null
183280 while ( ( lineMatch = lineCommentPattern . exec ( source ) ) ) {
184281 comments . push ( lineMatch [ 0 ] )
185- if ( comments . join ( '\n' ) . length >= MAX_LICENSE_TEXT_CHARS ) break
282+ commentChars += lineMatch [ 0 ] . length + 1
283+ if ( commentChars >= MAX_LICENSE_TEXT_CHARS ) break
186284 }
187285
188286 return comments
@@ -200,11 +298,12 @@ const buildSidecarLicenseUrl = (scriptUrl: string): string => {
200298 }
201299}
202300
203- const scanScriptLicense = async ( scriptUrl : string ) : Promise < ScriptLicenseObservation | null > => {
204- const source = await fetchLimitedText ( scriptUrl , MAX_FETCH_BYTES )
205- const comments = source ? extractLicenseComments ( source ) : [ ]
301+ const scanScriptLicense = async ( scriptUrl : string , budget : ScanBudget ) : Promise < ScriptLicenseObservation | null > => {
302+ const source = await fetchSampledScriptText ( scriptUrl , budget )
303+ const comments = unique ( source ? extractLicenseComments ( source ) : [ ] )
206304 const sidecarUrl = buildSidecarLicenseUrl ( scriptUrl )
207- const sidecarText = sidecarUrl ? await fetchLimitedText ( sidecarUrl , MAX_SIDECAR_BYTES ) : ''
305+ const sidecarText =
306+ sidecarUrl && comments . length < 12 && hasScanBudget ( budget ) ? await fetchLimitedText ( sidecarUrl , MAX_SIDECAR_BYTES , budget ) : ''
208307 const text = trimLicenseText ( [ ...comments , sidecarText ] . filter ( Boolean ) . join ( '\n\n' ) )
209308
210309 if ( ! text ) return null
@@ -260,7 +359,15 @@ export const runBundleLicenseDetection = async (tabId: number): Promise<void> =>
260359 if ( ! signature ) return
261360 if ( data . bundle ?. schemaVersion === BUNDLE_LICENSE_SCHEMA_VERSION && data . bundle ?. signature === signature ) return
262361
263- const observations = ( await Promise . all ( scripts . map ( script => scanScriptLicense ( script ) ) ) ) . filter ( Boolean ) as ScriptLicenseObservation [ ]
362+ const budget = createScanBudget ( )
363+ const observations : ScriptLicenseObservation [ ] = [ ]
364+ for ( const script of scripts ) {
365+ if ( ! hasScanBudget ( budget ) ) break
366+ const observation = await scanScriptLicense ( script , budget )
367+ if ( observation ) observations . push ( observation )
368+ await yieldToEventLoop ( )
369+ }
370+
264371 const technologies = detectTechnologiesFromLicenseText ( observations , pageRules . bundleLicenseLibraries || [ ] )
265372
266373 data . bundle = {
0 commit comments