|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# Remove PDFs in assets/**/pdf that are not referenced by any generated HTML |
| 5 | +# Usage: ./prune_pdfs.sh [--dry-run] |
| 6 | + |
| 7 | +DRY_RUN=false |
| 8 | +if [[ ${1-} == "--dry-run" ]]; then |
| 9 | + DRY_RUN=true |
| 10 | +fi |
| 11 | + |
| 12 | +repo_root="$(cd "$(dirname "$0")" && pwd)" |
| 13 | +cd "$repo_root" |
| 14 | + |
| 15 | +# Collect referenced PDFs from all HTML files |
| 16 | +mapfile -t html_files < <(ls *.html 2>/dev/null || true) |
| 17 | +if [[ ${#html_files[@]} -eq 0 ]]; then |
| 18 | + echo "No HTML files found; nothing to prune." |
| 19 | + exit 0 |
| 20 | +fi |
| 21 | + |
| 22 | +echo "Scanning HTML files for PDF references..." |
| 23 | +referenced=$(grep -hoE 'assets/[A-Za-z0-9_-]+/pdf/[A-Za-z0-9._-]+\.pdf|assets/pdf/[A-Za-z0-9._-]+\.pdf' -- *.html 2>/dev/null | sort -u || true) |
| 24 | + |
| 25 | +# Normalize to absolute paths |
| 26 | +declare -A keep |
| 27 | +while IFS= read -r rel; do |
| 28 | + [[ -z "$rel" ]] && continue |
| 29 | + abs="$repo_root/$rel" |
| 30 | + keep["$abs"]=1 |
| 31 | +done < <(printf "%s\n" "$referenced") |
| 32 | + |
| 33 | +# Find all PDFs under assets/**/pdf |
| 34 | +mapfile -t all_pdfs < <(find assets -type f -name '*.pdf') |
| 35 | + |
| 36 | +# Determine unreferenced PDFs |
| 37 | +unreferenced=() |
| 38 | +for f in "${all_pdfs[@]}"; do |
| 39 | + if [[ -z ${keep["$repo_root/${f#${repo_root}/}"]+x} && -z ${keep["$f"]+x} ]]; then |
| 40 | + unreferenced+=("$f") |
| 41 | + fi |
| 42 | +done |
| 43 | + |
| 44 | +count_total=${#all_pdfs[@]} |
| 45 | +count_keep=${#keep[@]} |
| 46 | +count_delete=${#unreferenced[@]} |
| 47 | + |
| 48 | +echo "Total PDFs: $count_total" |
| 49 | +echo "Referenced PDFs: $count_keep" |
| 50 | +if [[ $count_delete -eq 0 ]]; then |
| 51 | + echo "No unreferenced PDFs to delete." |
| 52 | + exit 0 |
| 53 | +fi |
| 54 | + |
| 55 | +echo "Unreferenced PDFs to delete ($count_delete):" |
| 56 | +printf " - %s\n" "${unreferenced[@]}" |
| 57 | + |
| 58 | +if $DRY_RUN; then |
| 59 | + echo "Dry run: not deleting files." |
| 60 | + exit 0 |
| 61 | +fi |
| 62 | + |
| 63 | +echo "Deleting unreferenced PDFs..." |
| 64 | +for f in "${unreferenced[@]}"; do |
| 65 | + rm -f -- "$f" |
| 66 | +done |
| 67 | + |
| 68 | +echo "Done." |
0 commit comments