Skip to content

Commit 502e647

Browse files
committed
feat: 加多路径版本号识别和图标兜底链路
支持 versionPattern、versionFrom、versionFromAttribute、minPatternMatches 等规则字段、TechChip 加官网 favicon 与 HTML link 标签兜底、Reka UI 与 shadcn-vue 与 Open Graph 与 Priority Hints 与 alt-svc HTTP/3 与 AWS 总品牌等规则,收紧 Teleport 与 Reka UI 误识别,修 popup-cache 三处 version 字段丢失、Server 头首段污染、semver 字典序排序坑。 将版本号提升到 1.3.70。
1 parent b832893 commit 502e647

26 files changed

Lines changed: 1072 additions & 88 deletions

build-scripts/extract-wappalyzer-icons.mjs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,25 @@ const MANIFEST_PATH = path.join(repoRoot, 'src', 'ui', 'components', 'skills-ind
2121
// service worker / page-detector 在运行时硬编码塞到识别结果里、不在 rules JSON 中出现的技术名
2222
const EXTRA_NAMES = ['HTTP/2', 'HTTP/3', 'HTTPS']
2323

24+
// 规则 name → Wappalyzer 图标文件 basename(不带后缀)的手写别名。
25+
// 用在自动 slug 匹配 + 首词兜底都查不到 Wappalyzer 图标时,作为最后一档兜底。
26+
// key 是我们规则里的 tech.name(原文,大小写区分);value 是 Wappalyzer images/icons/ 下的文件名
27+
const ICON_NAME_ALIASES = {
28+
'Hotwire Turbo': 'Turbo',
29+
AWS: 'Amazon Web Services',
30+
'AWS CloudFront': 'Amazon Cloudfront',
31+
'AWS S3 / Static Hosting': 'Amazon S3',
32+
'shadcn-vue': 'shadcn-ui',
33+
'Tencent EdgeOne': 'EdgeOne'
34+
}
35+
36+
// 首词品牌别名:把我们规则常用的简写品牌前缀映射到 Wappalyzer 命名风格,
37+
// 让首词兜底也能正确命中(AWS Bedrock / AWS Amplify / AWS Textract 等都走 Amazon Web Services 主 logo)
38+
const BRAND_FIRST_WORD_ALIASES = {
39+
AWS: 'Amazon Web Services',
40+
Alibaba: 'Alibaba Cloud'
41+
}
42+
2443
if (!fs.existsSync(ICON_DIR)) {
2544
console.error(`找不到 Wappalyzer 图标目录:${ICON_DIR}`)
2645
console.error('请设置环境变量 WAPPALYZER_ICON_DIR 指向本地安装的 images/icons 目录')
@@ -87,14 +106,22 @@ fs.mkdirSync(OUTPUT_DIR, { recursive: true })
87106

88107
// 给定一个 name,返回命中的 Wappalyzer slug(可能跟 localKey 不同,例如 cloudflarewebanalytics → cloudflare)
89108
const matchWappalyzerSlug = name => {
109+
// 0. 手写 alias 优先:Hotwire Turbo → Turbo 之类拼接 / 复合命名的特殊情况
110+
const aliased = ICON_NAME_ALIASES[name]
111+
if (aliased) {
112+
const aliasSlug = normalize(aliased)
113+
if (aliasSlug && iconBySlug.has(aliasSlug)) return aliasSlug
114+
}
90115
const base = primaryName(name)
91116
const fullSlug = normalize(base)
92117
if (iconBySlug.has(fullSlug)) return fullSlug
93118
// 首词兜底:"Cloudflare Web Analytics" → "Cloudflare" → cloudflare;
94119
// "Microsoft Teams" → "Microsoft"。会用品牌主 logo,牺牲一点准确度换覆盖率
95120
const firstWord = base.split(/\s+/)[0]
96121
if (!firstWord) return null
97-
const firstSlug = normalize(firstWord)
122+
// 先经过品牌别名(AWS → Amazon Web Services),再走 slug 匹配
123+
const brandAliased = BRAND_FIRST_WORD_ALIASES[firstWord] || firstWord
124+
const firstSlug = normalize(brandAliased)
98125
if (firstSlug && firstSlug !== fullSlug && iconBySlug.has(firstSlug)) return firstSlug
99126
return null
100127
}
@@ -142,8 +169,8 @@ for (const name of ruleNames) {
142169
// 自定义图标比 Wappalyzer 优先
143170
if (fs.existsSync(CUSTOM_DIR)) {
144171
for (const f of fs.readdirSync(CUSTOM_DIR)) {
145-
if (!f.endsWith('.svg') && !f.endsWith('.png')) continue
146-
const slug = normalize(f.replace(/\.(svg|png)$/i, ''))
172+
if (!/\.(svg|png|ico)$/i.test(f)) continue
173+
const slug = normalize(f.replace(/\.(svg|png|ico)$/i, ''))
147174
if (!slug) continue
148175
const ext = path.extname(f).toLowerCase().slice(1)
149176
const filename = slug + '.' + ext
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
#!/usr/bin/env node
2+
// 给 tech-links.json 里"有官网但 Wappalyzer 没收图标"的 tech 自动抓官网 favicon,
3+
// 保存到 build-scripts/custom-icons/<slug>.{svg,png,ico},随后被 extract-wappalyzer-icons.mjs 复制到 public/skills/
4+
//
5+
// 工作流:
6+
// 1. 读 tech-links.json 拿所有 tech 名 → 官网 URL 映射
7+
// 2. 跳过 skills-index 已经有图标的 slug
8+
// 3. 跳过黑名单域名(github/npm/wordpress.org/drupal.org 等"非品牌官网"位置)
9+
// 4. 抓 HTML head 找最大 favicon:SVG → apple-touch-icon → 32+ PNG → /favicon.ico
10+
// 5. 下载保存
11+
//
12+
// 用法:
13+
// node build-scripts/fetch-tech-favicons.mjs # 全量
14+
// node build-scripts/fetch-tech-favicons.mjs --limit=50 # 只跑前 50 个
15+
16+
import fs from 'node:fs'
17+
import path from 'node:path'
18+
import { fileURLToPath } from 'node:url'
19+
20+
const __dirname = path.dirname(fileURLToPath(import.meta.url))
21+
const repoRoot = path.resolve(__dirname, '..')
22+
const TECH_LINKS_PATH = path.join(repoRoot, 'public', 'tech-links.json')
23+
const MANIFEST_PATH = path.join(repoRoot, 'src', 'ui', 'components', 'skills-index.json')
24+
const OUTPUT_DIR = path.join(__dirname, 'custom-icons')
25+
26+
const CONCURRENCY = 8
27+
const TIMEOUT_MS = 6000
28+
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36'
29+
30+
const args = process.argv.slice(2)
31+
const limit = Number(args.find(a => a.startsWith('--limit='))?.split('=')[1] || 0)
32+
const force = args.includes('--force')
33+
const onlyName = args.find(a => a.startsWith('--only='))?.split('=')[1] || ''
34+
35+
// 这些域名是"非品牌官网"集散地,即使 tech-links 里指向它们也不能拿来当品牌 logo
36+
const DOMAIN_BLOCKLIST = new Set([
37+
'github.com',
38+
'gitlab.com',
39+
'bitbucket.org',
40+
'codeberg.org',
41+
'sourceforge.net',
42+
'npmjs.com',
43+
'www.npmjs.com',
44+
'wordpress.org',
45+
'drupal.org',
46+
'packagist.org',
47+
'cdnjs.com',
48+
'unpkg.com',
49+
'jsdelivr.com',
50+
'cdn.jsdelivr.net',
51+
'yarnpkg.com',
52+
'pypi.org',
53+
'mvnrepository.com',
54+
'nuget.org',
55+
'rubygems.org',
56+
'crates.io',
57+
'hex.pm',
58+
'pkg.go.dev',
59+
'pub.dev',
60+
'developer.mozilla.org',
61+
'w3.org',
62+
'www.w3.org',
63+
'spec.whatwg.org',
64+
'tc39.es',
65+
'caniuse.com',
66+
'web.dev',
67+
'developers.google.com',
68+
'docs.microsoft.com',
69+
'learn.microsoft.com',
70+
'docs.aws.amazon.com'
71+
])
72+
73+
// 我们规则里有些 name 不是品牌名(协议/规范类),用官网 favicon 没意义
74+
const NAME_SKIP = new Set([
75+
'HTTPS',
76+
'HTTP/2',
77+
'HTTP/3',
78+
'JavaScript',
79+
'WebSocket',
80+
'WebAssembly',
81+
'ES Modules',
82+
'PWA Manifest',
83+
'Service Worker'
84+
])
85+
86+
const normalize = raw =>
87+
String(raw || '')
88+
.toLowerCase()
89+
.replace(/\./g, 'dot')
90+
.replace(/\+/g, 'plus')
91+
.replace(/&/g, 'and')
92+
.replace(/[^a-z0-9-]/g, '')
93+
94+
const primaryName = name =>
95+
String(name || '')
96+
.split(/\s*\/\s*/)[0]
97+
.trim() || String(name || '').trim()
98+
99+
const slugFromName = name => normalize(primaryName(name))
100+
101+
const sleep = ms => new Promise(r => setTimeout(r, ms))
102+
103+
// 拉取带超时,签名 200
104+
const fetchWithTimeout = async (url, init = {}) => {
105+
const ctrl = new AbortController()
106+
const timer = setTimeout(() => ctrl.abort(), TIMEOUT_MS)
107+
try {
108+
const res = await fetch(url, {
109+
...init,
110+
signal: ctrl.signal,
111+
redirect: 'follow',
112+
headers: { 'User-Agent': USER_AGENT, Accept: '*/*', ...(init.headers || {}) }
113+
})
114+
return res
115+
} finally {
116+
clearTimeout(timer)
117+
}
118+
}
119+
120+
const parseSizes = sizes => {
121+
if (!sizes) return 0
122+
const m = /(\d+)x(\d+)/i.exec(String(sizes))
123+
return m ? Math.min(Number(m[1]), Number(m[2])) : 0
124+
}
125+
126+
// 从 HTML head 找最大 favicon 候选
127+
const extractFaviconCandidates = (html, baseUrl) => {
128+
const candidates = []
129+
const linkPattern = /<link\b[^>]+>/gi
130+
let m
131+
while ((m = linkPattern.exec(html))) {
132+
const tag = m[0]
133+
const rel = /\brel\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || ''
134+
const href = /\bhref\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || ''
135+
const type = /\btype\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || ''
136+
const sizes = /\bsizes\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || ''
137+
if (!href) continue
138+
const lowerRel = rel.toLowerCase()
139+
if (!/(icon|apple-touch-icon|mask-icon)/.test(lowerRel)) continue
140+
let abs
141+
try {
142+
abs = new URL(href, baseUrl).toString()
143+
} catch {
144+
continue
145+
}
146+
const isSvg = /\.svg(?:$|[?#])/i.test(abs) || /svg/i.test(type)
147+
const isApple = /apple-touch-icon/.test(lowerRel)
148+
const size = parseSizes(sizes)
149+
// 评分:SVG > apple-touch > 大 size > 普通 icon
150+
let score = 0
151+
if (isSvg) score += 1000
152+
if (isApple) score += 300
153+
score += size
154+
candidates.push({ url: abs, score, type })
155+
}
156+
return candidates.sort((a, b) => b.score - a.score)
157+
}
158+
159+
const extFromUrlAndType = (url, contentType) => {
160+
if (/\.svg(?:$|[?#])/i.test(url) || /svg/i.test(contentType)) return 'svg'
161+
if (/\.png(?:$|[?#])/i.test(url) || /png/i.test(contentType)) return 'png'
162+
if (/\.ico(?:$|[?#])/i.test(url) || /x-icon|icon/i.test(contentType)) return 'ico'
163+
if (/\.jpe?g(?:$|[?#])/i.test(url) || /jpe?g/i.test(contentType)) return 'png' // 用 .png 后缀容纳 jpeg(浏览器 img 会自动识别)
164+
return ''
165+
}
166+
167+
const fetchIcon = async (techName, websiteUrl) => {
168+
let origin
169+
try {
170+
origin = new URL(websiteUrl).origin
171+
} catch {
172+
return { ok: false, reason: 'invalid-url' }
173+
}
174+
// 1. 抓 origin HTML,从 <link rel="icon"> 找最大候选(就算 HTML 拉不下来,也兜底直接打 /favicon.ico)
175+
let html = ''
176+
try {
177+
const res = await fetchWithTimeout(origin, {})
178+
if (res.ok) html = await res.text()
179+
} catch {
180+
// 忽略,继续走 fallback
181+
}
182+
const candidates = extractFaviconCandidates(html, origin)
183+
// 2. fallback:apple-touch-icon / favicon.ico 几个标准位置
184+
candidates.push({ url: new URL('/apple-touch-icon.png', origin).toString(), score: 200, type: '' })
185+
candidates.push({ url: new URL('/favicon.svg', origin).toString(), score: 100, type: '' })
186+
candidates.push({ url: new URL('/favicon.ico', origin).toString(), score: 0, type: '' })
187+
188+
const tried = []
189+
for (const cand of candidates) {
190+
try {
191+
const res = await fetchWithTimeout(cand.url, {})
192+
if (!res.ok) {
193+
tried.push(`${cand.url}=${res.status}`)
194+
continue
195+
}
196+
const buf = Buffer.from(await res.arrayBuffer())
197+
const contentType = res.headers.get('content-type') || ''
198+
if (buf.length < 64) {
199+
tried.push(`${cand.url}=tooSmall(${buf.length})`)
200+
continue
201+
}
202+
if (buf.length > 512 * 1024) {
203+
tried.push(`${cand.url}=tooLarge(${buf.length})`)
204+
continue
205+
}
206+
const ext = extFromUrlAndType(cand.url, contentType)
207+
if (!ext) {
208+
tried.push(`${cand.url}=unknownExt(ct=${contentType})`)
209+
continue
210+
}
211+
return { ok: true, ext, buf, sourceUrl: cand.url, score: cand.score }
212+
} catch (e) {
213+
tried.push(`${cand.url}=err:${e?.name || 'unknown'}`)
214+
continue
215+
}
216+
}
217+
return { ok: false, reason: 'no-icon-found', tried }
218+
}
219+
220+
// ---------------- 主流程 ----------------
221+
222+
const techLinks = JSON.parse(fs.readFileSync(TECH_LINKS_PATH, 'utf8')).links || {}
223+
const skillsIndex = JSON.parse(fs.readFileSync(MANIFEST_PATH, 'utf8')).skillsIndex || {}
224+
225+
fs.mkdirSync(OUTPUT_DIR, { recursive: true })
226+
const existingCustom = new Set(fs.readdirSync(OUTPUT_DIR).map(f => normalize(f.replace(/\.(svg|png|ico)$/i, ''))))
227+
228+
const queue = []
229+
for (const [name, url] of Object.entries(techLinks)) {
230+
if (!url || typeof url !== 'string') continue
231+
if (NAME_SKIP.has(name)) continue
232+
if (onlyName && name !== onlyName) continue
233+
const slug = slugFromName(name)
234+
if (!slug) continue
235+
if (!force && skillsIndex[slug]) continue // 已有 Wappalyzer / 之前抓的图标,跳过
236+
if (!force && existingCustom.has(slug)) continue
237+
let host
238+
try {
239+
host = new URL(url).host.toLowerCase()
240+
} catch {
241+
continue
242+
}
243+
if (DOMAIN_BLOCKLIST.has(host)) continue
244+
queue.push({ name, url, slug })
245+
}
246+
247+
console.log(
248+
`[plan] 候选 tech 数:${queue.length}(总 tech: ${Object.keys(techLinks).length},Wappalyzer 已覆盖: ${Object.keys(skillsIndex).length})`
249+
)
250+
const todo = limit > 0 ? queue.slice(0, limit) : queue
251+
console.log(`[plan] 本次抓:${todo.length} 个(${limit > 0 ? '--limit=' + limit : '全量'})`)
252+
253+
let okCount = 0
254+
let failCount = 0
255+
const fails = []
256+
257+
const runBatch = async batch => {
258+
const results = await Promise.all(
259+
batch.map(async item => {
260+
const r = await fetchIcon(item.name, item.url)
261+
if (!r.ok) {
262+
failCount++
263+
fails.push({ name: item.name, slug: item.slug, reason: r.reason, tried: r.tried })
264+
return
265+
}
266+
const filename = item.slug + '.' + r.ext
267+
fs.writeFileSync(path.join(OUTPUT_DIR, filename), r.buf)
268+
okCount++
269+
console.log(` ✓ ${item.name.padEnd(40)} ${r.ext.padEnd(4)} ${r.buf.length}B ${r.sourceUrl.slice(0, 80)}`)
270+
})
271+
)
272+
return results
273+
}
274+
275+
;(async () => {
276+
for (let i = 0; i < todo.length; i += CONCURRENCY) {
277+
const batch = todo.slice(i, i + CONCURRENCY)
278+
await runBatch(batch)
279+
if (i % 40 === 0) await sleep(200) // 节流,别太凶
280+
}
281+
console.log(`\n[done] ok=${okCount} fail=${failCount}`)
282+
if (fails.length && fails.length <= 30) {
283+
console.log('\n失败明细(前 30):')
284+
fails.slice(0, 30).forEach(f => {
285+
console.log(` · ${f.name.padEnd(40)} ${f.reason}`)
286+
if (f.tried) f.tried.forEach(t => console.log(` ${t}`))
287+
})
288+
}
289+
console.log(`\n下一步:跑 \`node build-scripts/extract-wappalyzer-icons.mjs\` 让 custom-icons/ 复制到 public/skills/ 并更新 manifest`)
290+
})()

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
"name": "stackprism",
33
"private": true,
4-
"version": "1.3.69",
4+
"version": "1.3.70",
55
"type": "module",
66
"description": "StackPrism 用于检测网页前端、后端、CDN、SaaS、广告营销、统计、登录、支付、网站程序和主题模板线索。",
77
"scripts": {
8-
"typecheck": "vue-tsc --noEmit",
8+
"typecheck": "vue-tsc --noEmit && pnpm build",
99
"lint": "eslint src",
1010
"build:injected": "node build-scripts/build-injected.mjs",
1111
"build": "pnpm run build:injected && vite build",

public/rules/headers/header-patterns.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7460,6 +7460,18 @@
74607460
"name": "Tggl",
74617461
"patterns": ["url: https?://api\\.tggl\\.io/", "url: https?://cdn\\.tggl\\.io/"],
74627462
"evidence": "响应 URL 指向 Tggl"
7463+
},
7464+
{
7465+
"name": "AWS",
7466+
"__hints": ["cloudfront", "x-amz-cf-", "x-amzn-", "amazons3", "awselb"],
7467+
"patterns": [
7468+
"(?:^|\\n)x-amz-(?:cf-(?:id|pop)|request-id|id-2|bucket-region|trace-id|server-side-encryption|version-id|delete-marker):",
7469+
"(?:^|\\n)x-amzn-(?:requestid|trace-id|errortype|remapped-x-amzn-requestid|cors-allowed-origin):",
7470+
"via:.*\\bcloudfront\\b",
7471+
"x-cache:.*\\bcloudfront\\b",
7472+
"server:\\s*(?:awselb|amazons3|amazon)\\b"
7473+
],
7474+
"evidence": "响应头包含 AWS 服务特征(CloudFront / S3 / API Gateway / ELB 等)"
74637475
}
74647476
]
74657477
},
@@ -9065,6 +9077,12 @@
90659077
"name": "WebSocket",
90669078
"patterns": ["url:\\s*wss?://|(?:^|\\n)upgrade:\\s*websocket|(?:^|\\n)sec-websocket-accept:"],
90679079
"evidence": "WebSocket 握手或 ws/wss 请求 URL"
9080+
},
9081+
{
9082+
"name": "HTTP/3",
9083+
"confidence": "",
9084+
"patterns": ["(?:^|\\n)alt-svc:[^\\n]*\\bh3(?:-\\d+)?="],
9085+
"evidence": "alt-svc 响应头通告 HTTP/3 支持"
90689086
}
90699087
]
90709088
},

0 commit comments

Comments
 (0)