Skip to content

Commit 9f1828b

Browse files
ndbroadbentclaude
andcommitted
Fix fingerprint: remove timestamps from hash, add forbidden names check
- Remove timestamps from fingerprint hash entirely (WhatsApp exports are NOT idempotent - same message can have ±1-2 second drift) - Identity now comes from sender + content + order only - Add lefthook check for forbidden personal names in source code - Remove orphaned clustering snapshot with personal data - Anonymize test data with generic names (John Smith, Jane Smith) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent be2d313 commit 9f1828b

7 files changed

Lines changed: 169 additions & 1595 deletions

File tree

lefthook.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ pre-commit:
2929
file-length:
3030
run: ./scripts/check_file_length.sh {staged_files}
3131

32+
forbidden-names:
33+
run: ./scripts/check_forbidden_names.sh {staged_files}
34+
3235
biome-ignores:
3336
glob: "*.ts"
3437
run: |

scripts/check_forbidden_names.sh

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/bin/bash
2+
# Check for forbidden personal names in source code
3+
# These should not appear in production code (use generic names in tests)
4+
5+
set -e
6+
7+
EXIT_CODE=0
8+
9+
# Colors for output
10+
RED='\033[0;31m'
11+
GREEN='\033[0;32m'
12+
NC='\033[0m' # No Color
13+
14+
# Forbidden patterns (case-insensitive grep patterns)
15+
FORBIDDEN_PATTERNS=(
16+
"Nathan Broadbent"
17+
"Masha Broadbent"
18+
)
19+
20+
# Allowlisted files (relative to repo root)
21+
ALLOWLIST=(
22+
"scripts/check_forbidden_names.sh" # This script itself
23+
)
24+
25+
is_allowlisted() {
26+
local file="$1"
27+
for allowed in "${ALLOWLIST[@]}"; do
28+
if [[ "$file" == *"$allowed" ]]; then
29+
return 0
30+
fi
31+
done
32+
return 1
33+
}
34+
35+
check_file() {
36+
local file="$1"
37+
38+
# Skip if file doesn't exist
39+
[[ ! -f "$file" ]] && return 0
40+
41+
# Skip binary and non-source files
42+
case "$file" in
43+
*.gz|*.zip|*.png|*.jpg|*.jpeg|*.gif|*.ico|*.woff|*.woff2|*.ttf|*.eot|*.pdf)
44+
return 0
45+
;;
46+
*node_modules*|*dist/*|*.lock)
47+
return 0
48+
;;
49+
esac
50+
51+
# Skip allowlisted files
52+
if is_allowlisted "$file"; then
53+
return 0
54+
fi
55+
56+
for pattern in "${FORBIDDEN_PATTERNS[@]}"; do
57+
if grep -q "$pattern" "$file" 2>/dev/null; then
58+
echo -e "${RED}$file: contains forbidden name '$pattern'${NC}"
59+
echo " Use generic names like 'Alice Smith', 'Bob Jones' instead (first + last required)"
60+
echo " Also review the surrounding content - anonymize any sensitive or personal chat data"
61+
EXIT_CODE=1
62+
fi
63+
done
64+
}
65+
66+
# If specific files are passed, check only those
67+
if [[ $# -gt 0 ]]; then
68+
for file in "$@"; do
69+
check_file "$file"
70+
done
71+
else
72+
# Check all source files
73+
while IFS= read -r -d '' file; do
74+
check_file "$file"
75+
done < <(find src -type f \( -name "*.ts" -o -name "*.js" -o -name "*.json" -o -name "*.md" \) -print0 2>/dev/null || true)
76+
fi
77+
78+
if [[ $EXIT_CODE -eq 0 ]]; then
79+
echo -e "${GREEN}✅ No forbidden names found${NC}"
80+
fi
81+
82+
exit $EXIT_CODE

0 commit comments

Comments
 (0)