@@ -3,24 +3,27 @@ import { buildEffectivePageRules, loadDetectorSettings, loadTechRules } from './
33import { mergeTechnologyRecords , shortHeaderUrl } from './merge'
44import { getTabData , getTabSnapshot , updateBadgeForTab , writeTabData } from './tab-store'
55import { matchesCompiledRulePatterns , matchesRuleTextHints } from './rule-matcher'
6+ import { withTabWriteLock } from './tab-write-lock'
67import { isDetectablePageUrl } from '@/utils/page-support'
78import { cleanTechnologyUrl } from '@/utils/url'
89
9- const BUNDLE_LICENSE_SCHEMA_VERSION = 4
10+ const BUNDLE_LICENSE_SCHEMA_VERSION = 7
1011const BUNDLE_LICENSE_SOURCE = 'JS 版权注释'
1112const MAX_CANDIDATE_SCRIPTS = 5
1213const MAX_FETCH_BYTES = 384 * 1024
1314const MAX_RANGE_SAMPLE_BYTES = 160 * 1024
14- const MAX_TOTAL_SAMPLE_BYTES = 2 * 1024 * 1024
15+ // 5 个候选 × 384KB head = 1.92MB,再加 index 类大文件的 range 采样,budget 设到 4MB 才够
16+ const MAX_TOTAL_SAMPLE_BYTES = 4 * 1024 * 1024
1517const MIN_SAMPLE_BYTES = 24 * 1024
16- const MAX_RANGE_SAMPLES_PER_SCRIPT = 6
17- const MAX_RANGE_SAMPLES_PER_SCAN = 10
18+ // 减到 3:对 OAuth URL / 内嵌 license 来说,首段 384KB 加 3 个尾段采样足够,留预算给其他候选
19+ const MAX_RANGE_SAMPLES_PER_SCRIPT = 3
20+ const MAX_RANGE_SAMPLES_PER_SCAN = 8
1821const MAX_SIDECAR_BYTES = 160 * 1024
1922const MAX_LICENSE_TEXT_CHARS = 180_000
2023const FETCH_TIMEOUT_MS = 6000
21- const MAX_SCAN_MS = 8000
22- const SCAN_DELAY_MS = 1400
23- const RANGE_SAMPLE_RATIOS = [ 0.25 , 0.5 , 0.8 , 0.835 , 0.9 , 1 ] as const
24+ const MAX_SCAN_MS = 12000
25+ const SCAN_DELAY_MS = 600
26+ const RANGE_SAMPLE_RATIOS = [ 0.25 , 0.5 , 1 ] as const
2427
2528const bundleLicenseTimers = new Map < number , ReturnType < typeof setTimeout > > ( )
2629
@@ -34,6 +37,11 @@ type ScriptLicenseObservation = {
3437 commentCount : number
3538 text : string
3639 sidecarUrl ?: string
40+ // JS 包体里嵌入的 OAuth / SSO / 登录端点 URL:SPA 把这种 URL 写在 bundle 里,
41+ // HTML / 资源列表 / 响应头里看不到,需要单独走第三方登录规则匹配
42+ embeddedAuthUrls ?: string [ ]
43+ // 实际拉到的字节数:用来在 popup 原始线索里诊断脚本是否真的被 fetch 到了
44+ sourceLength ?: number
3745}
3846
3947type RangeFetchResult = {
@@ -227,8 +235,10 @@ const fetchTextRange = async (url: string, start: number, maxBytes: number, budg
227235 cache : 'force-cache' ,
228236 credentials : 'omit' ,
229237 headers : { Range : `bytes=${ safeStart } -${ end } ` } ,
230- signal : controller . signal
231- } )
238+ signal : controller . signal ,
239+ // 让 Chrome 把 bundle 扫描的请求排到页面资源后面,不抢用户的关键带宽
240+ priority : 'low'
241+ } as RequestInit )
232242 if ( ! response . ok ) return { rangeSupported : false , text : '' }
233243 if ( ! isTextLikeResponse ( url , response ) ) return { rangeSupported : false , text : '' }
234244
@@ -290,6 +300,23 @@ const trimLicenseText = (text: string): string => {
290300
291301const looksLikeHtmlDocument = ( text : string ) : boolean => / ^ \s * (?: < ! d o c t y p e \s + h t m l | < h t m l [ \s > ] ) / i. test ( text )
292302
303+ // 匹配 JS 包体里嵌入的 OAuth / SSO 形态 URL,常见于 SPA 里的「使用 X 登录」按钮回调
304+ // 抓到的 URL 会跑一遍第三方登录规则匹配,覆盖 HTML / 资源列表 / 响应头都拿不到的盲区
305+ const EMBEDDED_AUTH_URL_PATTERN =
306+ / h t t p s ? : \/ \/ [ a - z 0 - 9 ] [ a - z 0 - 9 . - ] { 2 , } \. [ a - z ] { 2 , } \/ (?: [ ^ \s ' " ` < > ] * ?\/ ) ? (?: o a u t h 2 ? | a u t h o r i z e | c o n n e c t | s s o | o p e n i d [ - _ / ] ? c o n n e c t | s a m l | s i g n i n | s i g n - i n | l o g i n \/ o a u t h ) \b [ ^ \s ' " ` < > ) \] } ] { 0 , 200 } / gi
307+
308+ const extractEmbeddedAuthUrls = ( source : string ) : string [ ] => {
309+ if ( ! source ) return [ ]
310+ const seen = new Set < string > ( )
311+ for ( const match of source . matchAll ( EMBEDDED_AUTH_URL_PATTERN ) ) {
312+ const url = match [ 0 ] . replace ( / [ ) \] ; , . } ' ` " ] + $ / , '' ) . trim ( )
313+ if ( url . length < 14 || url . length > 240 ) continue
314+ seen . add ( url )
315+ if ( seen . size >= 60 ) break
316+ }
317+ return [ ...seen ]
318+ }
319+
293320const extractLicenseComments = ( source : string ) : string [ ] => {
294321 const comments : string [ ] = [ ]
295322 let commentChars = 0
@@ -336,17 +363,22 @@ const fetchSidecarLicenseText = async (sidecarUrl: string, budget: ScanBudget):
336363
337364const scanScriptLicense = async ( scriptUrl : string , budget : ScanBudget ) : Promise < ScriptLicenseObservation | null > => {
338365 const source = await fetchSampledScriptText ( scriptUrl , budget )
366+ const sourceLength = source ?. length || 0
339367 const comments = unique ( source ? extractLicenseComments ( source ) : [ ] )
368+ const embeddedAuthUrls = source ? extractEmbeddedAuthUrls ( source ) : [ ]
340369 const sidecarUrl = buildSidecarLicenseUrl ( scriptUrl )
341370 const sidecarText = sidecarUrl && comments . length < 12 && hasScanBudget ( budget ) ? await fetchSidecarLicenseText ( sidecarUrl , budget ) : ''
342371 const text = trimLicenseText ( [ ...comments , sidecarText ] . filter ( Boolean ) . join ( '\n\n' ) )
343372
344- if ( ! text ) return null
373+ // 没拿到任何字节也保留 observation,方便用户在原始线索里看到「fetch 失败」而不是悄无声息
374+ if ( ! sourceLength && ! text && ! embeddedAuthUrls . length ) return null
345375 return {
346376 url : scriptUrl ,
347377 commentCount : comments . length + ( sidecarText ? 1 : 0 ) ,
348378 text,
349- sidecarUrl : sidecarText ? sidecarUrl : undefined
379+ sidecarUrl : sidecarText ? sidecarUrl : undefined ,
380+ embeddedAuthUrls : embeddedAuthUrls . length ? embeddedAuthUrls : undefined ,
381+ sourceLength : sourceLength || undefined
350382 }
351383}
352384
@@ -373,12 +405,48 @@ const detectTechnologiesFromLicenseText = (observations: ScriptLicenseObservatio
373405 return mergeTechnologyRecords ( technologies ) . slice ( 0 , 80 )
374406}
375407
408+ // 用第三方登录规则跑一遍 bundle 里抓出的 OAuth URL:
409+ // SPA 把「使用 linux.do / GitHub / Google 登录」的回调 URL 写在 bundle 里时
410+ // HTML / 资源 URL / 响应头都看不到,只能扫包体抓 URL 后再走规则匹配
411+ const detectAuthProvidersFromBundles = ( observations : ScriptLicenseObservation [ ] , rules : any [ ] ) : any [ ] => {
412+ if ( ! Array . isArray ( rules ) || ! rules . length || ! observations . length ) return [ ]
413+
414+ const technologies : any [ ] = [ ]
415+ for ( const observation of observations ) {
416+ if ( ! observation . embeddedAuthUrls ?. length ) continue
417+ const urlBlob = observation . embeddedAuthUrls . join ( '\n' )
418+ for ( const rule of rules ) {
419+ if ( ! rule ?. name ) continue
420+ if ( ! matchesCompiledRulePatterns ( rule , urlBlob ) ) continue
421+ const matched = observation . embeddedAuthUrls . find ( u => matchesCompiledRulePatterns ( rule , u ) ) || observation . embeddedAuthUrls [ 0 ]
422+ const kindPrefix = rule . kind ? `${ rule . kind } :` : ''
423+ technologies . push ( {
424+ category : rule . category || '第三方登录 / OAuth' ,
425+ name : rule . name ,
426+ confidence : rule . confidence || '高' ,
427+ evidence : [ `${ kindPrefix } JS 包体内嵌 OAuth 入口 ${ shortHeaderUrl ( matched ) } ` ] ,
428+ source : BUNDLE_LICENSE_SOURCE ,
429+ url : cleanTechnologyUrl ( rule . url )
430+ } )
431+ }
432+ }
433+
434+ return mergeTechnologyRecords ( technologies ) . slice ( 0 , 40 )
435+ }
436+
376437const saveBundleLicenseDataAndBadge = async ( tabId : number , data : any , settings : any , tab : any ) => {
377438 if ( ! isDetectablePageUrl ( tab ?. url ) ) return
378- const popup = await buildPopupCacheRecord ( data , settings , tab )
379- const { popup : _legacyPopup , ...tabData } = data || { }
380- await writeTabData ( tabId , tabData , popup )
381- await updateBadgeForTab ( tabId , popup )
439+ // 走 per-tab 写锁:bundle 扫描跑 1-2s,期间 detection / dynamic / headers 都可能在并发写;
440+ // 进入锁后再 re-read 最新 storage,只覆盖自己的 bundle 字段,其他字段保留最新
441+ await withTabWriteLock ( tabId , async ( ) => {
442+ const latest = ( await getTabData ( tabId ) ) || { }
443+ latest . bundle = data . bundle
444+ latest . updatedAt = data . updatedAt || Date . now ( )
445+ const popup = await buildPopupCacheRecord ( latest , settings , tab )
446+ const { popup : _legacyPopup , ...tabData } = latest
447+ await writeTabData ( tabId , tabData , popup )
448+ await updateBadgeForTab ( tabId , popup )
449+ } )
382450}
383451
384452export const runBundleLicenseDetection = async ( tabId : number ) : Promise < void > => {
@@ -395,15 +463,26 @@ export const runBundleLicenseDetection = async (tabId: number): Promise<void> =>
395463 if ( data . bundle ?. schemaVersion === BUNDLE_LICENSE_SCHEMA_VERSION && data . bundle ?. signature === signature ) return
396464
397465 const budget = createScanBudget ( )
466+ // 并发扫候选脚本,但限制同时 fetch 数为 3:完全并行会一次性占 5 条网络连接,
467+ // 跟页面自身资源抢带宽;3 个一组既能压住扫描总耗时,又不挤占用户的页面加载。
468+ const SCRIPT_SCAN_CONCURRENCY = 3
398469 const observations : ScriptLicenseObservation [ ] = [ ]
399- for ( const script of scripts ) {
470+ for ( let i = 0 ; i < scripts . length ; i += SCRIPT_SCAN_CONCURRENCY ) {
400471 if ( ! hasScanBudget ( budget ) ) break
401- const observation = await scanScriptLicense ( script , budget )
402- if ( observation ) observations . push ( observation )
472+ const batch = scripts . slice ( i , i + SCRIPT_SCAN_CONCURRENCY )
473+ const batchResults = await Promise . all (
474+ batch . map ( script => ( hasScanBudget ( budget ) ? scanScriptLicense ( script , budget ) : Promise . resolve ( null ) ) )
475+ )
476+ for ( const observation of batchResults ) {
477+ if ( observation ) observations . push ( observation )
478+ }
403479 await yieldToEventLoop ( )
404480 }
405481
406- const technologies = detectTechnologiesFromLicenseText ( observations , pageRules . bundleLicenseLibraries || [ ] )
482+ const technologies = mergeTechnologyRecords ( [
483+ ...detectTechnologiesFromLicenseText ( observations , pageRules . bundleLicenseLibraries || [ ] ) ,
484+ ...detectAuthProvidersFromBundles ( observations , pageRules . thirdPartyLogins || [ ] )
485+ ] )
407486 const pageIdentity = getBundlePageIdentity ( data , tab )
408487
409488 data . bundle = {
@@ -415,7 +494,9 @@ export const runBundleLicenseDetection = async (tabId: number): Promise<void> =>
415494 scripts : observations . map ( observation => ( {
416495 url : observation . url ,
417496 sidecarUrl : observation . sidecarUrl || '' ,
418- commentCount : observation . commentCount
497+ commentCount : observation . commentCount ,
498+ sourceLength : observation . sourceLength || 0 ,
499+ embeddedAuthUrls : observation . embeddedAuthUrls || [ ]
419500 } ) ) ,
420501 technologies
421502 }
0 commit comments