Skip to content

Commit d5a087f

Browse files
committed
feat: 扩本地图标覆盖到 2503 个、加首词兜底和 HTTPS 自定义
抽取脚本 slugify 改用 ` / `(带空格)拆别名,保留 HTTP/2、HTTP/3 这类纯版本号写法、命中 Wappalyzer 的 HTTP2.svg / HTTP3.svg、加 EXTRA_NAMES 把 page-detector 硬编码的 HTTP/2、HTTP/3、HTTPS 三个协议名也纳入扫描;命中失败再做首词兜底(Cloudflare Web Analytics → cloudflare、Spring Boot → spring、Azure CDN → azure),用品牌主 logo 替代文字色块、覆盖率从 1333 涨到 2503;manifest 改成 slug → filename 形态、多个别名共用一个物理文件、12MB 落盘控制 1660 个图标;新增 build-scripts/custom-icons/ 目录、补一个 HTTPS 锁形 SVG 兜底 Wappalyzer 缺失;将版本号提升到 1.3.68。
1 parent 82498ad commit d5a087f

340 files changed

Lines changed: 5677 additions & 55 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 3 additions & 0 deletions
Loading

build-scripts/extract-wappalyzer-icons.mjs

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ const DEFAULT_DIR =
1414
const ICON_DIR = process.env.WAPPALYZER_ICON_DIR || DEFAULT_DIR
1515
const RULES_DIR = path.join(repoRoot, 'public', 'rules')
1616
const OUTPUT_DIR = path.join(repoRoot, 'public', 'icons', 'tech')
17+
const CUSTOM_DIR = path.join(__dirname, 'custom-icons')
18+
19+
// service worker / page-detector 在运行时硬编码塞到识别结果里、不在 rules JSON 中出现的技术名
20+
const EXTRA_NAMES = ['HTTP/2', 'HTTP/3', 'HTTPS']
1721

1822
if (!fs.existsSync(ICON_DIR)) {
1923
console.error(`找不到 Wappalyzer 图标目录:${ICON_DIR}`)
@@ -45,7 +49,13 @@ const slugify = raw =>
4549
.toLowerCase()
4650
.replace(/[^a-z0-9]/g, '')
4751

48-
// 收 Wappalyzer 图标库:filename → [候选文件名(可能多种扩展名)]
52+
// 跟 TechChip 里的 toSlug 保持一致:用 ` / `(带空格)拆别名,保留 HTTP/2 这类纯版本号写法
53+
const primaryName = raw =>
54+
String(raw || '')
55+
.split(' / ')[0]
56+
.trim()
57+
58+
// 收 Wappalyzer 图标库:slug → [候选文件名(可能多种扩展名)]
4959
const iconBySlug = new Map()
5060
for (const f of fs.readdirSync(ICON_DIR)) {
5161
if (!f.endsWith('.svg') && !f.endsWith('.png')) continue
@@ -55,8 +65,8 @@ for (const f of fs.readdirSync(ICON_DIR)) {
5565
iconBySlug.get(slug).push(f)
5666
}
5767

58-
// 收我们 rules 里所有 name
59-
const ruleNames = new Set()
68+
// 收我们 rules 里所有 name + EXTRA_NAMES
69+
const ruleNames = new Set(EXTRA_NAMES)
6070
for (const f of walk(RULES_DIR)) {
6171
try {
6272
collectNames(JSON.parse(fs.readFileSync(f, 'utf8')), ruleNames)
@@ -69,30 +79,75 @@ for (const f of walk(RULES_DIR)) {
6979
if (fs.existsSync(OUTPUT_DIR)) fs.rmSync(OUTPUT_DIR, { recursive: true, force: true })
7080
fs.mkdirSync(OUTPUT_DIR, { recursive: true })
7181

72-
let copied = 0
82+
// 给定一个 name,返回命中的 Wappalyzer slug(可能跟 localKey 不同,例如 cloudflarewebanalytics → cloudflare)
83+
const matchWappalyzerSlug = name => {
84+
const base = primaryName(name)
85+
const fullSlug = slugify(base)
86+
if (iconBySlug.has(fullSlug)) return fullSlug
87+
// 首词兜底:"Cloudflare Web Analytics" → "Cloudflare" → cloudflare;
88+
// "Microsoft Teams" → "Microsoft"。会用品牌主 logo,牺牲一点准确度换覆盖率
89+
const firstWord = base.split(/\s+/)[0]
90+
if (!firstWord) return null
91+
const firstSlug = slugify(firstWord)
92+
if (firstSlug && firstSlug !== fullSlug && iconBySlug.has(firstSlug)) return firstSlug
93+
return null
94+
}
95+
96+
let mappedKeys = 0
97+
let prefixMatched = 0
7398
let svgCount = 0
7499
let pngCount = 0
100+
let customCount = 0
75101
let totalBytes = 0
102+
// manifest:localKey(规则名 slug) → 实际磁盘文件名(可能多个 key 共用一个文件,节省体积)
76103
const manifest = {}
77-
const seenSlugs = new Set()
104+
// 已写到磁盘的 Wappalyzer slug → 文件名,避免重复写
105+
const writtenFiles = new Map()
106+
const seenLocalKeys = new Set()
107+
78108
for (const name of ruleNames) {
79-
// 跟我们 TechChip 里的 slugify 算法保持一致:取 / 之前的部分,小写 + 去掉所有非字母数字
80-
const slug = slugify(name.split('/')[0])
81-
if (!slug || seenSlugs.has(slug)) continue
82-
const candidates = iconBySlug.get(slug)
83-
if (!candidates) continue
84-
85-
// 优先 svg
86-
const file = candidates.find(c => c.endsWith('.svg')) || candidates[0]
87-
const ext = path.extname(file).toLowerCase().slice(1)
88-
const dst = path.join(OUTPUT_DIR, slug + '.' + ext)
89-
fs.copyFileSync(path.join(ICON_DIR, file), dst)
90-
seenSlugs.add(slug)
91-
manifest[slug] = ext
92-
copied++
93-
totalBytes += fs.statSync(dst).size
94-
if (ext === 'svg') svgCount++
95-
else pngCount++
109+
const localKey = slugify(primaryName(name))
110+
if (!localKey || seenLocalKeys.has(localKey)) continue
111+
const matchedSlug = matchWappalyzerSlug(name)
112+
if (!matchedSlug) continue
113+
114+
let filename = writtenFiles.get(matchedSlug)
115+
if (!filename) {
116+
// 第一次见这个 Wappalyzer slug,落盘一份;优先 svg
117+
const candidates = iconBySlug.get(matchedSlug)
118+
const file = candidates.find(c => c.endsWith('.svg')) || candidates[0]
119+
const ext = path.extname(file).toLowerCase().slice(1)
120+
filename = matchedSlug + '.' + ext
121+
const dst = path.join(OUTPUT_DIR, filename)
122+
fs.copyFileSync(path.join(ICON_DIR, file), dst)
123+
writtenFiles.set(matchedSlug, filename)
124+
totalBytes += fs.statSync(dst).size
125+
if (ext === 'svg') svgCount++
126+
else pngCount++
127+
}
128+
129+
seenLocalKeys.add(localKey)
130+
manifest[localKey] = filename
131+
mappedKeys++
132+
if (matchedSlug !== localKey) prefixMatched++
133+
}
134+
135+
// 自定义图标:覆盖 Wappalyzer 没有的(HTTPS 锁、未来补充的)。
136+
// 自定义图标比 Wappalyzer 优先
137+
if (fs.existsSync(CUSTOM_DIR)) {
138+
for (const f of fs.readdirSync(CUSTOM_DIR)) {
139+
if (!f.endsWith('.svg') && !f.endsWith('.png')) continue
140+
const slug = slugify(f.replace(/\.(svg|png)$/i, ''))
141+
if (!slug) continue
142+
const ext = path.extname(f).toLowerCase().slice(1)
143+
const filename = slug + '.' + ext
144+
const dst = path.join(OUTPUT_DIR, filename)
145+
fs.copyFileSync(path.join(CUSTOM_DIR, f), dst)
146+
if (!manifest[slug]) mappedKeys++
147+
manifest[slug] = filename
148+
customCount++
149+
totalBytes += fs.statSync(dst).size
150+
}
96151
}
97152

98153
// 写出 manifest,运行时 TechChip 用它判断本地是否有图标、是 svg 还是 png,避免无意义 404
@@ -101,7 +156,10 @@ for (const slug of Object.keys(manifest).sort()) sortedManifest[slug] = manifest
101156
const manifestPath = path.join(repoRoot, 'src', 'ui', 'components', 'local-icon-manifest.json')
102157
fs.writeFileSync(manifestPath, JSON.stringify(sortedManifest) + '\n', 'utf8')
103158

104-
console.log(`抽取完成:${copied} 个图标 (svg: ${svgCount}, png: ${pngCount})`)
159+
console.log(
160+
`抽取完成:${mappedKeys} 个映射 → ${writtenFiles.size + customCount} 个物理文件 ` +
161+
`(svg: ${svgCount}, png: ${pngCount}, custom: ${customCount}, 首词兜底: ${prefixMatched})`
162+
)
105163
console.log(`输出目录:${OUTPUT_DIR}`)
106164
console.log(`总大小:${(totalBytes / 1024 / 1024).toFixed(2)} MB`)
107165
console.log(`manifest:${manifestPath}`)

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "stackprism",
33
"private": true,
4-
"version": "1.3.67",
4+
"version": "1.3.68",
55
"type": "module",
66
"description": "StackPrism 用于检测网页前端、后端、CDN、SaaS、广告营销、统计、登录、支付、网站程序和主题模板线索。",
77
"scripts": {

public/icons/tech/a3.png

1.41 KB
Loading

public/icons/tech/abp.svg

Lines changed: 7 additions & 0 deletions
Loading

public/icons/tech/absorb.svg

Lines changed: 5 additions & 0 deletions
Loading

public/icons/tech/accessibe.svg

Lines changed: 13 additions & 0 deletions
Loading

public/icons/tech/accessibly.svg

Lines changed: 1 addition & 0 deletions
Loading

0 commit comments

Comments
 (0)