Skip to content

Commit 9805589

Browse files
committed
feat: 优化版权注释采样检测
1 parent 772310e commit 9805589

4 files changed

Lines changed: 188 additions & 16 deletions

File tree

.prettierignore

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
node_modules/
2+
dist/
3+
build/
4+
release/
5+
releases/
6+
artifacts/
7+
public/injected/
8+
docs/.vitepress/cache/
9+
docs/.vitepress/dist/
10+
coverage/
11+
test-results/
12+
playwright-report/
13+
test/
14+
*.crx
15+
*.zip
16+
*.pem

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "stackprism",
33
"private": true,
4-
"version": "1.2.5",
4+
"version": "1.2.6",
55
"type": "module",
66
"description": "StackPrism 用于检测网页前端、后端、CDN、SaaS、广告营销、统计、登录、支付、网站程序和主题模板线索。",
77
"scripts": {

public/rules/page/bundle-license-libraries.json

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,11 @@
361361
"url": "https://www.i18next.com",
362362
"patterns": ["\\bi18next\\b|i18next\\.com"]
363363
},
364+
{
365+
"name": "Vue I18n",
366+
"url": "https://vue-i18n.intlify.dev",
367+
"patterns": ["vue-i18n\\s+v?\\d|\\bvue-i18n\\b|vue-i18n\\.intlify\\.dev"]
368+
},
364369
{
365370
"name": "react-i18next",
366371
"url": "https://react.i18next.com",
@@ -386,6 +391,16 @@
386391
"url": "https://github.com/cure53/DOMPurify",
387392
"patterns": ["DOMPurify|cure53/DOMPurify"]
388393
},
394+
{
395+
"name": "js-yaml",
396+
"url": "https://github.com/nodeca/js-yaml",
397+
"patterns": ["js-yaml(?:/dist/js-yaml\\.mjs)?|nodeca/js-yaml"]
398+
},
399+
{
400+
"name": "qrcode.react",
401+
"url": "https://github.com/zpao/qrcode.react",
402+
"patterns": ["qrcode\\.react|zpao/qrcode\\.react"]
403+
},
389404
{
390405
"name": "Marked",
391406
"url": "https://marked.js.org",
@@ -441,6 +456,35 @@
441456
"url": "https://ricostacruz.com/nprogress",
442457
"patterns": ["\\bNProgress\\b|nprogress(?:\\.min)?\\.js"]
443458
},
459+
{
460+
"name": "PullToRefresh.js",
461+
"url": "https://github.com/BoxFactura/pulltorefresh.js",
462+
"patterns": ["pulltorefreshjs|pulltorefresh\\.js|BoxFactura/pulltorefresh\\.js"]
463+
},
464+
{
465+
"category": "UI / CSS 框架",
466+
"name": "Animate.css",
467+
"url": "https://animate.style",
468+
"patterns": ["animate\\.css|animate\\.style"]
469+
},
470+
{
471+
"category": "构建与运行时",
472+
"name": "regenerator-runtime",
473+
"url": "https://github.com/facebook/regenerator",
474+
"patterns": ["regenerator-runtime|facebook/regenerator|packages/babel-helpers/LICENSE"]
475+
},
476+
{
477+
"category": "构建与运行时",
478+
"name": "buffer",
479+
"url": "https://github.com/feross/buffer",
480+
"patterns": ["The buffer module from node\\.js, for the browser|feross/buffer"]
481+
},
482+
{
483+
"category": "构建与运行时",
484+
"name": "ieee754",
485+
"url": "https://github.com/feross/ieee754",
486+
"patterns": ["ieee754\\. BSD-3-Clause|feross/ieee754|\\bieee754\\b[\\s\\S]{0,80}Feross"]
487+
},
444488
{
445489
"name": "XState",
446490
"url": "https://xstate.js.org",
@@ -648,6 +692,11 @@
648692
"url": "https://roughjs.com",
649693
"patterns": ["Rough\\.js|roughjs|roughjs\\.com"]
650694
},
695+
{
696+
"name": "@kurkle/color",
697+
"url": "https://github.com/kurkle/color",
698+
"patterns": ["@kurkle/color|kurkle/color"]
699+
},
651700
{
652701
"name": "P5.js",
653702
"url": "https://p5js.org",

src/background/bundle-license.ts

Lines changed: 122 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,21 @@ import { matchesCompiledRulePatterns, matchesRuleTextHints, passesRulePrefilter
66
import { isDetectablePageUrl } from '@/utils/page-support'
77
import { cleanTechnologyUrl } from '@/utils/url'
88

9-
const BUNDLE_LICENSE_SCHEMA_VERSION = 1
9+
const BUNDLE_LICENSE_SCHEMA_VERSION = 2
1010
const BUNDLE_LICENSE_SOURCE = 'JS 版权注释'
1111
const MAX_CANDIDATE_SCRIPTS = 5
1212
const MAX_FETCH_BYTES = 384 * 1024
13+
const MAX_RANGE_SAMPLE_BYTES = 160 * 1024
14+
const MAX_TOTAL_SAMPLE_BYTES = 2 * 1024 * 1024
15+
const MIN_SAMPLE_BYTES = 24 * 1024
16+
const MAX_RANGE_SAMPLES_PER_SCRIPT = 6
17+
const MAX_RANGE_SAMPLES_PER_SCAN = 10
1318
const MAX_SIDECAR_BYTES = 160 * 1024
1419
const MAX_LICENSE_TEXT_CHARS = 180_000
1520
const FETCH_TIMEOUT_MS = 6000
21+
const MAX_SCAN_MS = 8000
1622
const SCAN_DELAY_MS = 1400
23+
const RANGE_SAMPLE_RATIOS = [0.25, 0.5, 0.8, 0.835, 0.9, 1] as const
1724

1825
const bundleLicenseTimers = new Map<number, ReturnType<typeof setTimeout>>()
1926

@@ -29,8 +36,42 @@ type ScriptLicenseObservation = {
2936
sidecarUrl?: string
3037
}
3138

39+
type RangeFetchResult = {
40+
rangeSupported: boolean
41+
text: string
42+
totalBytes?: number
43+
}
44+
45+
type ScanBudget = {
46+
deadline: number
47+
remainingBytes: number
48+
remainingRangeSamples: number
49+
}
50+
3251
const unique = (items: string[]) => [...new Set(items.filter(Boolean))]
3352

53+
const createScanBudget = (): ScanBudget => ({
54+
deadline: Date.now() + MAX_SCAN_MS,
55+
remainingBytes: MAX_TOTAL_SAMPLE_BYTES,
56+
remainingRangeSamples: MAX_RANGE_SAMPLES_PER_SCAN
57+
})
58+
59+
const hasScanBudget = (budget: ScanBudget): boolean => budget.remainingBytes >= MIN_SAMPLE_BYTES && Date.now() < budget.deadline
60+
61+
const claimFetchBytes = (budget: ScanBudget, maxBytes: number): number => {
62+
if (!hasScanBudget(budget)) return 0
63+
const bytes = Math.min(Math.max(1, Math.floor(maxBytes)), budget.remainingBytes)
64+
if (bytes < MIN_SAMPLE_BYTES) return 0
65+
budget.remainingBytes -= bytes
66+
return bytes
67+
}
68+
69+
const remainingTimeoutMs = (budget: ScanBudget): number => Math.max(1, Math.min(FETCH_TIMEOUT_MS, budget.deadline - Date.now()))
70+
71+
const yieldToEventLoop = async (): Promise<void> => {
72+
await new Promise<void>(resolve => setTimeout(resolve, 0))
73+
}
74+
3475
const toAbsoluteHttpUrl = (value: unknown, baseUrl: string): string => {
3576
const text = String(value || '').trim()
3677
if (!text) return ''
@@ -136,27 +177,81 @@ const isTextLikeResponse = (url: string, response: Response): boolean => {
136177
return /javascript|ecmascript|text|plain|octet-stream/i.test(contentType)
137178
}
138179

139-
const fetchLimitedText = async (url: string, maxBytes: number): Promise<string> => {
180+
const parseContentRangeTotal = (value: string | null): number | undefined => {
181+
const match = value?.match(/\/(\d+)\s*$/)
182+
if (!match) return undefined
183+
const total = Number(match[1])
184+
return Number.isFinite(total) && total > 0 ? total : undefined
185+
}
186+
187+
const fetchTextRange = async (url: string, start: number, maxBytes: number, budget: ScanBudget): Promise<RangeFetchResult> => {
188+
const claimedBytes = claimFetchBytes(budget, maxBytes)
189+
if (!claimedBytes) return { rangeSupported: false, text: '' }
190+
140191
const controller = new AbortController()
141-
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
192+
const timeout = setTimeout(() => controller.abort(), remainingTimeoutMs(budget))
193+
const safeStart = Math.max(0, Math.floor(start))
194+
const safeMaxBytes = Math.max(1, Math.floor(claimedBytes))
195+
const end = safeStart + safeMaxBytes - 1
142196

143197
try {
144198
const response = await fetch(url, {
145199
cache: 'force-cache',
146200
credentials: 'omit',
147-
headers: { Range: `bytes=0-${maxBytes - 1}` },
201+
headers: { Range: `bytes=${safeStart}-${end}` },
148202
signal: controller.signal
149203
})
150-
if (!response.ok) return ''
151-
if (!isTextLikeResponse(url, response)) return ''
152-
return readLimitedResponseText(response, maxBytes)
204+
if (!response.ok) return { rangeSupported: false, text: '' }
205+
if (!isTextLikeResponse(url, response)) return { rangeSupported: false, text: '' }
206+
207+
const rangeSupported = response.status === 206
208+
const totalBytes = parseContentRangeTotal(response.headers.get('content-range'))
209+
return {
210+
rangeSupported,
211+
text: await readLimitedResponseText(response, safeMaxBytes),
212+
totalBytes
213+
}
153214
} catch {
154-
return ''
215+
return { rangeSupported: false, text: '' }
155216
} finally {
156217
clearTimeout(timeout)
157218
}
158219
}
159220

221+
const fetchLimitedText = async (url: string, maxBytes: number, budget: ScanBudget): Promise<string> =>
222+
(await fetchTextRange(url, 0, maxBytes, budget)).text
223+
224+
const buildRangeSampleStarts = (totalBytes: number): number[] => {
225+
if (!Number.isFinite(totalBytes) || totalBytes <= MAX_FETCH_BYTES + MAX_RANGE_SAMPLE_BYTES) return []
226+
227+
const maxStart = Math.max(0, totalBytes - MAX_RANGE_SAMPLE_BYTES)
228+
const starts: number[] = []
229+
for (const ratio of RANGE_SAMPLE_RATIOS) {
230+
const start = ratio >= 1 ? maxStart : Math.floor(maxStart * ratio)
231+
if (start <= MAX_FETCH_BYTES) continue
232+
if (starts.some(item => Math.abs(item - start) < MAX_RANGE_SAMPLE_BYTES / 2)) continue
233+
starts.push(start)
234+
}
235+
236+
return starts.sort((a, b) => a - b).slice(0, MAX_RANGE_SAMPLES_PER_SCRIPT)
237+
}
238+
239+
const fetchSampledScriptText = async (url: string, budget: ScanBudget): Promise<string> => {
240+
const head = await fetchTextRange(url, 0, MAX_FETCH_BYTES, budget)
241+
const chunks = [head.text]
242+
if (!head.rangeSupported || !head.totalBytes) return chunks.join('\n')
243+
244+
for (const start of buildRangeSampleStarts(head.totalBytes)) {
245+
if (!hasScanBudget(budget) || budget.remainingRangeSamples <= 0) break
246+
budget.remainingRangeSamples -= 1
247+
await yieldToEventLoop()
248+
const result = await fetchTextRange(url, start, MAX_RANGE_SAMPLE_BYTES, budget)
249+
if (result.text) chunks.push(result.text)
250+
}
251+
252+
return chunks.join('\n')
253+
}
254+
160255
const isLicenseComment = (comment: string): boolean =>
161256
/^\/\*!/.test(comment) || /@(?:license|preserve)|copyright|licensed under|license information/i.test(comment)
162257

@@ -167,22 +262,25 @@ const trimLicenseText = (text: string): string => {
167262

168263
const extractLicenseComments = (source: string): string[] => {
169264
const comments: string[] = []
265+
let commentChars = 0
170266
const blockCommentPattern = /\/\*[\s\S]*?\*\//g
171267
let blockMatch: RegExpExecArray | null
172268

173269
while ((blockMatch = blockCommentPattern.exec(source))) {
174270
const comment = blockMatch[0]
175271
if (isLicenseComment(comment)) {
176272
comments.push(comment)
273+
commentChars += comment.length + 1
177274
}
178-
if (comments.join('\n').length >= MAX_LICENSE_TEXT_CHARS) break
275+
if (commentChars >= MAX_LICENSE_TEXT_CHARS) break
179276
}
180277

181278
const lineCommentPattern = /^\s*\/\/[^\n]*(?:@license|@preserve|copyright|license)[^\n]*(?:\n\s*\/\/[^\n]*){0,8}/gim
182279
let lineMatch: RegExpExecArray | null
183280
while ((lineMatch = lineCommentPattern.exec(source))) {
184281
comments.push(lineMatch[0])
185-
if (comments.join('\n').length >= MAX_LICENSE_TEXT_CHARS) break
282+
commentChars += lineMatch[0].length + 1
283+
if (commentChars >= MAX_LICENSE_TEXT_CHARS) break
186284
}
187285

188286
return comments
@@ -200,11 +298,12 @@ const buildSidecarLicenseUrl = (scriptUrl: string): string => {
200298
}
201299
}
202300

203-
const scanScriptLicense = async (scriptUrl: string): Promise<ScriptLicenseObservation | null> => {
204-
const source = await fetchLimitedText(scriptUrl, MAX_FETCH_BYTES)
205-
const comments = source ? extractLicenseComments(source) : []
301+
const scanScriptLicense = async (scriptUrl: string, budget: ScanBudget): Promise<ScriptLicenseObservation | null> => {
302+
const source = await fetchSampledScriptText(scriptUrl, budget)
303+
const comments = unique(source ? extractLicenseComments(source) : [])
206304
const sidecarUrl = buildSidecarLicenseUrl(scriptUrl)
207-
const sidecarText = sidecarUrl ? await fetchLimitedText(sidecarUrl, MAX_SIDECAR_BYTES) : ''
305+
const sidecarText =
306+
sidecarUrl && comments.length < 12 && hasScanBudget(budget) ? await fetchLimitedText(sidecarUrl, MAX_SIDECAR_BYTES, budget) : ''
208307
const text = trimLicenseText([...comments, sidecarText].filter(Boolean).join('\n\n'))
209308

210309
if (!text) return null
@@ -260,7 +359,15 @@ export const runBundleLicenseDetection = async (tabId: number): Promise<void> =>
260359
if (!signature) return
261360
if (data.bundle?.schemaVersion === BUNDLE_LICENSE_SCHEMA_VERSION && data.bundle?.signature === signature) return
262361

263-
const observations = (await Promise.all(scripts.map(script => scanScriptLicense(script)))).filter(Boolean) as ScriptLicenseObservation[]
362+
const budget = createScanBudget()
363+
const observations: ScriptLicenseObservation[] = []
364+
for (const script of scripts) {
365+
if (!hasScanBudget(budget)) break
366+
const observation = await scanScriptLicense(script, budget)
367+
if (observation) observations.push(observation)
368+
await yieldToEventLoop()
369+
}
370+
264371
const technologies = detectTechnologiesFromLicenseText(observations, pageRules.bundleLicenseLibraries || [])
265372

266373
data.bundle = {

0 commit comments

Comments
 (0)