-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck-ascii.mjs
More file actions
103 lines (92 loc) · 3.04 KB
/
Copy pathcheck-ascii.mjs
File metadata and controls
103 lines (92 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env node
// check-ascii.mjs
//
// Lightweight guard: fail if any tracked source/docs/scripts file contains a
// non-ASCII byte. Keeps repo-maintenance text plain ASCII so we never have to
// do "mojibake surgery" on invisible Unicode again.
//
// Scans a small, fixed set of text extensions under the repo root, skipping
// the usual generated/dependency directories AND the generated docs/ai output
// (that HTML embeds scanned target data, which may legitimately be non-ASCII).
//
// Usage: node scripts/check-ascii.mjs (exit 0 = clean, 1 = violations found)
import { readdirSync, statSync, readFileSync } from "node:fs";
import { join, extname, relative, sep, posix } from "node:path";
const ROOT = process.cwd();
const TEXT_EXT = new Set([
".md", ".mjs", ".cjs", ".js", ".json", ".html", ".css", ".sh",
".yml", ".yaml", ".txt",
]);
const SKIP_DIRS = new Set([
".git", "node_modules", "vendor", "dist", "build", ".next",
".svelte-kit", "storage", "coverage",
]);
// Skip generated visualizer artifacts: those embed scanned target content,
// which may legitimately be non-ASCII. Only the visualize/ sub-namespace is
// skipped; root docs/ai/ orientation files (from codebase-orient-skill) are
// tracked text and should be checked.
function isSkippedPath(relPath) {
const normalized = relPath.split(sep).join(posix.sep);
return normalized.includes("docs/ai/visualize/") || normalized.startsWith("fixtures/adversarial-repo/");
}
function collect(dir, acc) {
let entries;
try {
entries = readdirSync(dir, { withFileTypes: true });
} catch {
return;
}
for (const e of entries) {
const abs = join(dir, e.name);
if (e.isDirectory()) {
if (SKIP_DIRS.has(e.name)) continue;
collect(abs, acc);
} else if (e.isFile()) {
if (!TEXT_EXT.has(extname(e.name).toLowerCase())) continue;
const rel = relative(ROOT, abs);
if (isSkippedPath(rel)) continue;
acc.push({ abs, rel });
}
}
}
function scanFile(file) {
const violations = [];
const text = readFileSync(file.abs, "utf8");
const lines = text.split("\n");
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
for (let c = 0; c < line.length; c++) {
const code = line.codePointAt(c);
if (code > 0x7f) {
violations.push({
line: i + 1,
col: c + 1,
code: "U+" + code.toString(16).toUpperCase().padStart(4, "0"),
});
break; // one report per line is enough
}
}
}
return violations;
}
function main() {
const files = [];
collect(ROOT, files);
let total = 0;
for (const f of files) {
const v = scanFile(f);
if (v.length) {
total += v.length;
for (const hit of v) {
const relPosix = f.rel.split(sep).join(posix.sep);
console.log(`${relPosix}:${hit.line}:${hit.col} non-ASCII ${hit.code}`);
}
}
}
if (total > 0) {
console.error(`\ncheck:ascii FAILED - ${total} non-ASCII occurrence(s) found.`);
process.exit(1);
}
console.log(`check:ascii OK - ${files.length} files scanned, all ASCII.`);
}
main();