Skip to content

Commit 1bb17a1

Browse files
authored
Merge pull request #697 from apoint123/feat/better-metadata-stripper
✨ feat: 更好的元数据清理
2 parents 88aaa7a + 23ed96e commit 1bb17a1

4 files changed

Lines changed: 768 additions & 100 deletions

File tree

scripts/sort-keywords.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import fs from "node:fs";
2+
import path from "node:path";
3+
4+
const TARGET_FILE = path.join(process.cwd(), "src/assets/data/exclude.ts");
5+
6+
if (!fs.existsSync(TARGET_FILE)) {
7+
console.error("❌ 找不到 src/assets/data/exclude.ts,确保你在项目根目录下运行此脚本");
8+
process.exit(1);
9+
}
10+
11+
const fileContent = fs.readFileSync(TARGET_FILE, "utf-8");
12+
13+
// export const keywords = [ ... ];
14+
const keywordsMatch = fileContent.match(/(export\s+const\s+keywords\s*=\s*\[)([\s\S]*?)(\];)/);
15+
16+
if (!keywordsMatch) {
17+
console.error("❌ 找不到 `export const keywords = [...]` 结构");
18+
process.exit(1);
19+
}
20+
21+
const [fullMatch, prefix, rawContent, suffix] = keywordsMatch;
22+
23+
const itemRegex = /(['"`])(.*?)\1/g;
24+
const rawItems: string[] = [];
25+
let match: RegExpExecArray | null;
26+
27+
while ((match = itemRegex.exec(rawContent)) !== null) {
28+
const content = match[2].trim();
29+
if (content) {
30+
rawItems.push(content);
31+
}
32+
}
33+
34+
console.log(`✅ 找到 ${rawItems.length} 个关键词`);
35+
36+
const uniqueMap = new Map<string, string>();
37+
let duplicatesRemoved = 0;
38+
39+
for (const item of rawItems) {
40+
const fingerprint = item.toLowerCase().replace(/\s+/g, "");
41+
42+
if (uniqueMap.has(fingerprint)) {
43+
const existing = uniqueMap.get(fingerprint)!;
44+
45+
if (item.length > existing.length) {
46+
uniqueMap.set(fingerprint, item);
47+
}
48+
49+
duplicatesRemoved++;
50+
} else {
51+
uniqueMap.set(fingerprint, item);
52+
}
53+
}
54+
55+
const uniqueItems = Array.from(uniqueMap.values());
56+
57+
console.log(`🧹 去重完毕,关键词有 ${uniqueItems.length},移除了 ${duplicatesRemoved}`);
58+
59+
const collator = new Intl.Collator("zh-Hans-CN", { sensitivity: "accent" });
60+
61+
uniqueItems.sort((a, b) => {
62+
return collator.compare(a, b);
63+
});
64+
65+
const newArrayContent = uniqueItems.map((item) => ` "${item}",`).join("\n");
66+
67+
const newContentBlock = `\n${newArrayContent}\n`;
68+
69+
const newFileContent = fileContent.replace(fullMatch, `${prefix}${newContentBlock}${suffix}`);
70+
71+
fs.writeFileSync(TARGET_FILE, newFileContent, "utf-8");

0 commit comments

Comments
 (0)