Skip to content

Commit cc99609

Browse files
committed
Add pruning mechanism
1 parent 89c94f4 commit cc99609

1 file changed

Lines changed: 68 additions & 0 deletions

File tree

prune_pdfs.sh

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Remove PDFs in assets/**/pdf that are not referenced by any generated HTML
5+
# Usage: ./prune_pdfs.sh [--dry-run]
6+
7+
DRY_RUN=false
8+
if [[ ${1-} == "--dry-run" ]]; then
9+
DRY_RUN=true
10+
fi
11+
12+
repo_root="$(cd "$(dirname "$0")" && pwd)"
13+
cd "$repo_root"
14+
15+
# Collect referenced PDFs from all HTML files
16+
mapfile -t html_files < <(ls *.html 2>/dev/null || true)
17+
if [[ ${#html_files[@]} -eq 0 ]]; then
18+
echo "No HTML files found; nothing to prune."
19+
exit 0
20+
fi
21+
22+
echo "Scanning HTML files for PDF references..."
23+
referenced=$(grep -hoE 'assets/[A-Za-z0-9_-]+/pdf/[A-Za-z0-9._-]+\.pdf|assets/pdf/[A-Za-z0-9._-]+\.pdf' -- *.html 2>/dev/null | sort -u || true)
24+
25+
# Normalize to absolute paths
26+
declare -A keep
27+
while IFS= read -r rel; do
28+
[[ -z "$rel" ]] && continue
29+
abs="$repo_root/$rel"
30+
keep["$abs"]=1
31+
done < <(printf "%s\n" "$referenced")
32+
33+
# Find all PDFs under assets/**/pdf
34+
mapfile -t all_pdfs < <(find assets -type f -name '*.pdf')
35+
36+
# Determine unreferenced PDFs
37+
unreferenced=()
38+
for f in "${all_pdfs[@]}"; do
39+
if [[ -z ${keep["$repo_root/${f#${repo_root}/}"]+x} && -z ${keep["$f"]+x} ]]; then
40+
unreferenced+=("$f")
41+
fi
42+
done
43+
44+
count_total=${#all_pdfs[@]}
45+
count_keep=${#keep[@]}
46+
count_delete=${#unreferenced[@]}
47+
48+
echo "Total PDFs: $count_total"
49+
echo "Referenced PDFs: $count_keep"
50+
if [[ $count_delete -eq 0 ]]; then
51+
echo "No unreferenced PDFs to delete."
52+
exit 0
53+
fi
54+
55+
echo "Unreferenced PDFs to delete ($count_delete):"
56+
printf " - %s\n" "${unreferenced[@]}"
57+
58+
if $DRY_RUN; then
59+
echo "Dry run: not deleting files."
60+
exit 0
61+
fi
62+
63+
echo "Deleting unreferenced PDFs..."
64+
for f in "${unreferenced[@]}"; do
65+
rm -f -- "$f"
66+
done
67+
68+
echo "Done."

0 commit comments

Comments
 (0)