Skip to content

Commit 4b882f2

Browse files
committed
fix: 修复版权注释规则预过滤漏检
1 parent 9805589 commit 4b882f2

8 files changed

Lines changed: 46 additions & 24 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "stackprism",
33
"private": true,
4-
"version": "1.2.6",
4+
"version": "1.2.7",
55
"type": "module",
66
"description": "StackPrism 用于检测网页前端、后端、CDN、SaaS、广告营销、统计、登录、支付、网站程序和主题模板线索。",
77
"scripts": {

public/rules/page/ai-platforms-assets.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,15 @@
209209
"(?:^|[\\/._-])(?:new-api|one-api)(?:[\\/._-]|[^\\s\"'<>]*\\.(?:js|css|svg|png|webp)(?:\\?|$))"
210210
]
211211
},
212+
{
213+
"name": "Sub2API",
214+
"patterns": [
215+
"<title>[^<]*-\\s*(?:AI\\s*)?API Gateway\\s*</title>",
216+
"\"site_subtitle\"\\s*:\\s*\"Subscription to API Conversion Platform\"",
217+
"window\\.__APP_CONFIG__\\s*=\\s*\\{[\\s\\S]{0,1200}(?:hide_ccs_import_button|purchase_subscription_enabled|linuxdo_oauth_enabled|custom_endpoints)",
218+
"(?:^|[\\/._-])sub2api(?:[\\/._-]|[^\\s\"'<>]*\\.(?:js|css|svg|png|webp)(?:\\?|$))"
219+
]
220+
},
212221
{
213222
"name": "VoAPI",
214223
"patterns": [

public/rules/page/bundle-license-libraries.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@
444444
{
445445
"name": "SortableJS",
446446
"url": "https://sortablejs.github.io/Sortable",
447-
"patterns": ["SortableJS|sortablejs|Sortable\\.create"]
447+
"patterns": ["SortableJS|sortablejs|Sortable\\s+v?\\d+\\.\\d+|Sortable\\.create"]
448448
},
449449
{
450450
"name": "Hammer.js",

public/tech-links.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@
495495
"HoneyHive": "https://www.honeyhive.ai",
496496
"Laminar": "https://www.lmnr.ai",
497497
"New API / One API": "https://github.com/Calcium-Ion/new-api",
498+
"Sub2API": "https://github.com/Wei-Shaw/sub2api",
498499
"VoAPI": "https://github.com/VoAPI/VoAPI",
499500
"Veloera": "https://github.com/Veloera/Veloera",
500501
"Botpress": "https://botpress.com",

src/background/bundle-license.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ import { buildPopupCacheRecord } from './popup-cache'
22
import { buildEffectivePageRules, loadDetectorSettings, loadTechRules } from './detector-settings'
33
import { mergeTechnologyRecords, shortHeaderUrl } from './merge'
44
import { getTabData, getTabSnapshot, updateBadgeForTab, writeTabData } from './tab-store'
5-
import { matchesCompiledRulePatterns, matchesRuleTextHints, passesRulePrefilter } from './rule-matcher'
5+
import { matchesCompiledRulePatterns, matchesRuleTextHints } from './rule-matcher'
66
import { isDetectablePageUrl } from '@/utils/page-support'
77
import { cleanTechnologyUrl } from '@/utils/url'
88

9-
const BUNDLE_LICENSE_SCHEMA_VERSION = 2
9+
const BUNDLE_LICENSE_SCHEMA_VERSION = 3
1010
const BUNDLE_LICENSE_SOURCE = 'JS 版权注释'
1111
const MAX_CANDIDATE_SCRIPTS = 5
1212
const MAX_FETCH_BYTES = 384 * 1024
@@ -260,6 +260,8 @@ const trimLicenseText = (text: string): string => {
260260
return text.slice(0, MAX_LICENSE_TEXT_CHARS)
261261
}
262262

263+
const looksLikeHtmlDocument = (text: string): boolean => /^\s*(?:<!doctype\s+html|<html[\s>])/i.test(text)
264+
263265
const extractLicenseComments = (source: string): string[] => {
264266
const comments: string[] = []
265267
let commentChars = 0
@@ -298,12 +300,17 @@ const buildSidecarLicenseUrl = (scriptUrl: string): string => {
298300
}
299301
}
300302

303+
const fetchSidecarLicenseText = async (sidecarUrl: string, budget: ScanBudget): Promise<string> => {
304+
const text = await fetchLimitedText(sidecarUrl, MAX_SIDECAR_BYTES, budget)
305+
if (!text || looksLikeHtmlDocument(text)) return ''
306+
return text
307+
}
308+
301309
const scanScriptLicense = async (scriptUrl: string, budget: ScanBudget): Promise<ScriptLicenseObservation | null> => {
302310
const source = await fetchSampledScriptText(scriptUrl, budget)
303311
const comments = unique(source ? extractLicenseComments(source) : [])
304312
const sidecarUrl = buildSidecarLicenseUrl(scriptUrl)
305-
const sidecarText =
306-
sidecarUrl && comments.length < 12 && hasScanBudget(budget) ? await fetchLimitedText(sidecarUrl, MAX_SIDECAR_BYTES, budget) : ''
313+
const sidecarText = sidecarUrl && comments.length < 12 && hasScanBudget(budget) ? await fetchSidecarLicenseText(sidecarUrl, budget) : ''
307314
const text = trimLicenseText([...comments, sidecarText].filter(Boolean).join('\n\n'))
308315

309316
if (!text) return null
@@ -322,7 +329,7 @@ const detectTechnologiesFromLicenseText = (observations: ScriptLicenseObservatio
322329
for (const observation of observations) {
323330
const lowerText = observation.text.toLowerCase()
324331
for (const rule of rules) {
325-
if (!rule?.name || !passesRulePrefilter(rule, lowerText) || !matchesRuleTextHints(rule, lowerText)) continue
332+
if (!rule?.name || !matchesRuleTextHints(rule, lowerText)) continue
326333
if (!matchesCompiledRulePatterns(rule, observation.text)) continue
327334
technologies.push({
328335
category: rule.category || '前端库',

src/background/rule-matcher.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ export const getCompiledCombinedPattern = (rule: any, patterns: unknown): RegExp
8181
const HINT_MIN_LEN = 4
8282
const HINT_MAX_COUNT = 3
8383
const REGEX_LITERAL_SPLIT = /[\\^$.|?*+()[\]{}]/
84+
const REGEX_CONTROL_ESCAPE = /\\[bBdDsSwW]/g
85+
86+
const normalizeHintCandidate = (value: string): string => value.toLowerCase().replace(/\s+/g, ' ').trim()
8487

8588
const extractHintCandidates = (rule: any): string[] => {
8689
const patterns = Array.isArray(rule?.patterns) ? rule.patterns : []
@@ -96,12 +99,10 @@ const extractHintCandidates = (rule: any): string[] => {
9699
if (lower.length >= HINT_MIN_LEN) candidates.push(lower)
97100
continue
98101
}
99-
let longest = ''
100-
for (const segment of text.split(REGEX_LITERAL_SPLIT)) {
101-
const lower = segment.toLowerCase()
102-
if (lower.length > longest.length) longest = lower
102+
for (const segment of text.replace(REGEX_CONTROL_ESCAPE, ' ').split(REGEX_LITERAL_SPLIT)) {
103+
const lower = normalizeHintCandidate(segment)
104+
if (lower.length >= HINT_MIN_LEN) candidates.push(lower)
103105
}
104-
if (longest.length >= HINT_MIN_LEN) candidates.push(longest)
105106
}
106107
return candidates
107108
}

src/injected/page-detector.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -922,20 +922,23 @@ ${html}`
922922
const patterns = rule.patterns || []
923923
const isKeyword = rule.matchType === 'keyword'
924924
const candidates = []
925+
const normalizeHintCandidate = value =>
926+
String(value || '')
927+
.toLowerCase()
928+
.replace(/\s+/g, ' ')
929+
.trim()
925930
for (const pattern of patterns) {
926931
const text = String(pattern || '')
927932
if (!text) continue
928933
if (isKeyword) {
929-
const lower = text.toLowerCase().trim()
934+
const lower = normalizeHintCandidate(text)
930935
if (lower.length >= 4) candidates.push(lower)
931936
continue
932937
}
933-
let longest = ''
934-
for (const segment of text.split(/[\\^$.|?*+()[\]{}]/)) {
935-
const lowerSeg = segment.toLowerCase()
936-
if (lowerSeg.length > longest.length) longest = lowerSeg
938+
for (const segment of text.replace(/\\[bBdDsSwW]/g, ' ').split(/[\\^$.|?*+()[\]{}]/)) {
939+
const lowerSeg = normalizeHintCandidate(segment)
940+
if (lowerSeg.length >= 4) candidates.push(lowerSeg)
937941
}
938-
if (longest.length >= 4) candidates.push(longest)
939942
}
940943
const unique = [...new Set(candidates)].sort((a, b) => b.length - a.length).slice(0, 3)
941944
ruleHintCache.set(rule, unique)

vite.config.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@ import manifest from './src/manifest.config'
99
const HINT_MIN_LEN = 4
1010
const HINT_MAX_COUNT = 3
1111
const REGEX_LITERAL_SPLIT = /[\\^$.|?*+()[\]{}]/
12+
const REGEX_CONTROL_ESCAPE = /\\[bBdDsSwW]/g
1213
const REGEX_ESCAPE = /[.*+?^${}()|[\]\\]/g
1314

1415
const escapeForRegex = (value: string) => value.replace(REGEX_ESCAPE, '\\$&')
1516

17+
const normalizeHintCandidate = (value: string): string => value.toLowerCase().replace(/\s+/g, ' ').trim()
18+
1619
const extractRuleHints = (patterns: unknown, isKeyword: boolean): string[] => {
1720
if (!Array.isArray(patterns) || !patterns.length) return []
1821
const candidates: string[] = []
@@ -24,12 +27,10 @@ const extractRuleHints = (patterns: unknown, isKeyword: boolean): string[] => {
2427
if (lower.length >= HINT_MIN_LEN) candidates.push(lower)
2528
continue
2629
}
27-
let longest = ''
28-
for (const segment of text.split(REGEX_LITERAL_SPLIT)) {
29-
const lower = segment.toLowerCase()
30-
if (lower.length > longest.length) longest = lower
30+
for (const segment of text.replace(REGEX_CONTROL_ESCAPE, ' ').split(REGEX_LITERAL_SPLIT)) {
31+
const lower = normalizeHintCandidate(segment)
32+
if (lower.length >= HINT_MIN_LEN) candidates.push(lower)
3133
}
32-
if (longest.length >= HINT_MIN_LEN) candidates.push(longest)
3334
}
3435
return [...new Set(candidates)].sort((a, b) => b.length - a.length).slice(0, HINT_MAX_COUNT)
3536
}
@@ -54,7 +55,7 @@ const precompileRuleTree = (node: any): void => {
5455
if (typeof node !== 'object') return
5556
if (isLeafRule(node)) {
5657
const isKeyword = node.matchType === 'keyword'
57-
const hints = extractRuleHints(node.patterns, isKeyword)
58+
const hints = Array.isArray(node.__hints) && node.__hints.length ? node.__hints : extractRuleHints(node.patterns, isKeyword)
5859
if (hints.length) node.__hints = hints
5960
if (isKeyword) {
6061
const combined = buildKeywordCombinedSource(node.patterns)

0 commit comments

Comments
 (0)