Skip to content

Commit 2827b6d

Browse files
committed
fix: 跨域 API/iframe 响应头不计入本站技术栈
新增可注册域(eTLD+1)判定,公共 CDN、三方服务的 API/iframe 响应头检测结果不再误算进本站技术栈;前后端分离的同站子域仍照常计入。 将版本号提升到 1.3.72。
1 parent a392783 commit 2827b6d

3 files changed

Lines changed: 101 additions & 4 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "stackprism",
33
"private": true,
4-
"version": "1.3.71",
4+
"version": "1.3.72",
55
"type": "module",
66
"description": "StackPrism 用于检测网页前端、后端、CDN、SaaS、广告营销、统计、登录、支付、网站程序和主题模板线索。",
77
"scripts": {

src/background/popup-cache.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
import { loadDetectorSettings, loadTechRules } from './detector-settings'
1515
import { categoryIndex, confidenceRank } from '@/utils/category-order'
1616
import { cleanTechnologyUrl } from '@/utils/url'
17+
import { isSameSite } from '@/utils/domain'
1718
import { cleanStringArray } from '@/utils/normalize-settings'
1819
import { normalizeTechName } from '@/utils/tech-name'
1920
import { checkPageSupport } from '@/utils/page-support'
@@ -241,10 +242,15 @@ const mergeDisplayTechnologyRecords = (items: any[]) => {
241242

242243
const collectRawReferenceTechnologies = (data: any) => {
243244
const items: any[] = []
245+
const pageUrl = data.page?.url || data.dynamic?.url || data.main?.url || ''
244246
addAllTechnologies(items, data.page?.technologies)
245247
addAllTechnologies(items, data.main?.technologies)
246-
for (const api of data.apis || []) addAllTechnologies(items, api.technologies)
247-
for (const frame of data.frames || []) addAllTechnologies(items, frame.technologies)
248+
for (const api of data.apis || []) {
249+
if (isSameSite(api.url, pageUrl)) addAllTechnologies(items, api.technologies)
250+
}
251+
for (const frame of data.frames || []) {
252+
if (isSameSite(frame.url, pageUrl)) addAllTechnologies(items, frame.technologies)
253+
}
248254
addAllTechnologies(items, data.bundle?.technologies)
249255
return items
250256
}
@@ -349,10 +355,14 @@ const shortHostFromUrl = (url: string): string => {
349355

350356
const buildDisplayTechnologies = (data: any, settings: any, suppressMap: Record<string, string[]>) => {
351357
const all: any[] = []
358+
const pageUrl = data.page?.url || data.dynamic?.url || data.main?.url || ''
352359
addAllTechnologies(all, data.page?.technologies)
353360
addAllTechnologies(all, data.main?.technologies)
354361
addAllTechnologies(all, collectHttpProtocolTechs(data))
362+
// 跨可注册域的 API / iframe 响应头只代表第三方(公共 CDN、三方服务)自身的基建,
363+
// 不算本站技术栈;同站子域(含前后端分离的 api.* 子域)仍计入。
355364
for (const api of data.apis || []) {
365+
if (!isSameSite(api.url, pageUrl)) continue
356366
addAllTechnologies(
357367
all,
358368
(api.technologies || []).map((tech: any) => ({
@@ -362,6 +372,7 @@ const buildDisplayTechnologies = (data: any, settings: any, suppressMap: Record<
362372
)
363373
}
364374
for (const frame of data.frames || []) {
375+
if (!isSameSite(frame.url, pageUrl)) continue
365376
addAllTechnologies(
366377
all,
367378
(frame.technologies || []).map((tech: any) => ({
@@ -384,7 +395,6 @@ const buildDisplayTechnologies = (data: any, settings: any, suppressMap: Record<
384395
source: tech.source || 'JS 版权注释'
385396
}))
386397
)
387-
const pageUrl = data.page?.url || data.dynamic?.url || data.main?.url || ''
388398
return filterTechnologiesBySettings(
389399
suppressSelfHostTechs(
390400
suppressGenericFrontendLibDuplicates(suppressGenericCdnFallbacks(mergeDisplayTechnologyRecords(all))),

src/utils/domain.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// 多级公共后缀(eTLD)表:用于把 a.example.co.uk 这类主机名归并到可注册域 example.co.uk。
2+
// 只收录常见国家/地区的二级后缀,缺失条目会退化成「按末两段判定」——即沿用过滤前的行为,
3+
// 不会造成误判加剧。命中表里的二级后缀时,可注册域取末三段。
4+
const MULTI_LABEL_SUFFIXES = new Set(
5+
`co.uk org.uk me.uk ltd.uk plc.uk net.uk sch.uk ac.uk gov.uk mod.uk nhs.uk police.uk
6+
com.cn net.cn org.cn gov.cn edu.cn ac.cn mil.cn ah.cn bj.cn cq.cn fj.cn gd.cn gs.cn gx.cn
7+
gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn ln.cn nm.cn nx.cn qh.cn
8+
sc.cn sd.cn sh.cn sn.cn sx.cn tj.cn xj.cn xz.cn yn.cn zj.cn mo.cn
9+
co.jp or.jp ne.jp ac.jp ad.jp ed.jp go.jp gr.jp lg.jp
10+
co.kr or.kr ne.kr re.kr pe.kr go.kr ac.kr hs.kr ms.kr es.kr sc.kr kg.kr
11+
com.au net.au org.au edu.au gov.au asn.au id.au
12+
com.br net.br org.br gov.br edu.br art.br mil.br
13+
com.hk net.hk org.hk edu.hk gov.hk idv.hk
14+
com.tw net.tw org.tw edu.tw gov.tw idv.tw game.tw ebiz.tw club.tw
15+
co.in net.in org.in gen.in firm.in ind.in gov.in ac.in edu.in res.in
16+
co.za net.za org.za gov.za ac.za web.za
17+
com.sg net.sg org.sg edu.sg gov.sg per.sg
18+
com.ru net.ru org.ru msk.ru spb.ru edu.ru gov.ru int.ru ac.ru
19+
com.mx net.mx org.mx edu.mx gob.mx
20+
com.tr net.tr org.tr edu.tr gov.tr bel.tr mil.tr k12.tr biz.tr info.tr name.tr
21+
com.ua net.ua org.ua edu.ua gov.ua in.ua kiev.ua
22+
com.vn net.vn org.vn edu.vn gov.vn biz.vn info.vn name.vn pro.vn
23+
com.my net.my org.my edu.my gov.my mil.my name.my
24+
com.ph net.ph org.ph edu.ph gov.ph
25+
co.id net.id or.id web.id ac.id sch.id go.id my.id biz.id desa.id
26+
co.th net.th or.th ac.th go.th in.th mi.th
27+
co.il net.il org.il ac.il gov.il k12.il muni.il idf.il
28+
co.nz net.nz org.nz govt.nz ac.nz geek.nz school.nz kiwi.nz gen.nz
29+
com.pl net.pl org.pl edu.pl gov.pl biz.pl info.pl waw.pl
30+
com.ar net.ar org.ar edu.ar gob.ar gov.ar int.ar mil.ar tur.ar
31+
com.co net.co org.co edu.co gov.co mil.co
32+
com.pk net.pk org.pk edu.pk gov.pk gob.pk
33+
com.sa net.sa org.sa edu.sa gov.sa med.sa pub.sa sch.sa
34+
com.eg net.eg org.eg edu.eg gov.eg eun.eg sci.eg
35+
com.ng net.ng org.ng edu.ng gov.ng
36+
co.ke ne.ke or.ke ac.ke go.ke sc.ke me.ke info.ke
37+
com.bd net.bd org.bd edu.bd gov.bd ac.bd
38+
com.lk net.lk org.lk edu.lk gov.lk ac.lk sch.lk
39+
com.np net.np org.np edu.np gov.np
40+
co.ir net.ir org.ir ac.ir gov.ir id.ir sch.ir
41+
co.ae net.ae org.ae ac.ae gov.ae sch.ae mil.ae
42+
com.qa net.qa org.qa edu.qa gov.qa mil.qa sch.qa
43+
com.jo net.jo org.jo edu.jo gov.jo mil.jo
44+
com.pt edu.pt gov.pt org.pt nome.pt int.pt net.pt publ.pt
45+
com.gr edu.gr net.gr org.gr gov.gr
46+
com.es org.es gob.es edu.es nom.es
47+
asso.fr com.fr gouv.fr nom.fr prd.fr tm.fr
48+
gov.it edu.it gov.ie`
49+
.split(/\s+/)
50+
.filter(Boolean)
51+
)
52+
53+
const IPV4_RE = /^\d{1,3}(?:\.\d{1,3}){3}$/
54+
55+
const isIpHost = (host: string): boolean => IPV4_RE.test(host) || host.includes(':')
56+
57+
// 把主机名归并到可注册域(eTLD+1)。IP / 单段主机名原样返回。
58+
export const getRegistrableDomain = (hostname: unknown): string => {
59+
const host = String(hostname ?? '')
60+
.toLowerCase()
61+
.replace(/\.$/, '')
62+
if (!host || isIpHost(host)) return host
63+
const labels = host.split('.')
64+
if (labels.length <= 2) return host
65+
const last2 = labels.slice(-2).join('.')
66+
if (MULTI_LABEL_SUFFIXES.has(last2)) return labels.slice(-3).join('.')
67+
return last2
68+
}
69+
70+
const registrableDomainFromUrl = (rawUrl: unknown): string => {
71+
try {
72+
return getRegistrableDomain(new URL(String(rawUrl ?? '')).hostname)
73+
} catch {
74+
return ''
75+
}
76+
}
77+
78+
// 判断某条请求记录的 URL 是否与页面属于同一可注册域。
79+
// 页面 URL 不可解析时返回 true(无法比较则不过滤,保持原行为);
80+
// 记录 URL 不可解析时返回 false(按第三方处理,避免把外部信号算进本站)。
81+
export const isSameSite = (recordUrl: unknown, pageUrl: unknown): boolean => {
82+
const pageDomain = registrableDomainFromUrl(pageUrl)
83+
if (!pageDomain) return true
84+
const recordDomain = registrableDomainFromUrl(recordUrl)
85+
if (!recordDomain) return false
86+
return recordDomain === pageDomain
87+
}

0 commit comments

Comments
 (0)