Skip to content

Commit 27676e8

Browse files
removing unused photos
1 parent 986a74e commit 27676e8

86 files changed

Lines changed: 230 additions & 17 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
#!/usr/bin/env python3
2+
"""Find image files in static/ that are not referenced in the repository."""
3+
4+
from __future__ import annotations
5+
6+
import argparse
7+
import os
8+
import re
9+
from pathlib import Path
10+
from typing import Iterable
11+
12+
13+
IMAGE_SUFFIXES: set[str] = {
14+
".avif",
15+
".bmp",
16+
".gif",
17+
".ico",
18+
".jpeg",
19+
".jpg",
20+
".png",
21+
".svg",
22+
".tif",
23+
".tiff",
24+
".webp",
25+
}
26+
27+
TEXT_SUFFIXES: set[str] = {
28+
".css",
29+
".html",
30+
".js",
31+
".json",
32+
".jsx",
33+
".md",
34+
".mdx",
35+
".scss",
36+
".ts",
37+
".tsx",
38+
".yaml",
39+
".yml",
40+
}
41+
42+
EXCLUDED_DIRS: set[str] = {
43+
".codex",
44+
".docusaurus",
45+
".git",
46+
".tickets",
47+
"build",
48+
"node_modules",
49+
"static",
50+
}
51+
52+
DEFAULT_REFERENCE_SOURCES: tuple[str, ...] = (
53+
"user-guide",
54+
"developer-guide",
55+
"experiments",
56+
"src",
57+
"docusaurus.config.js",
58+
"sidebars.js",
59+
)
60+
61+
IMAGE_TOKEN_RE = re.compile(
62+
r"""(?P<quote>['"`])(?P<path>[^'"`\n]+?\.(?:avif|bmp|gif|ico|jpe?g|png|svg|tiff?|webp))(?P=quote)""",
63+
re.IGNORECASE,
64+
)
65+
IMG_WEB_PATH_RE = re.compile(
66+
r"""/img/[^\s"'`)\]>]+?\.(?:avif|bmp|gif|ico|jpe?g|png|svg|tiff?|webp)""",
67+
re.IGNORECASE,
68+
)
69+
STATIC_PATH_RE = re.compile(
70+
r"""(?:@site/)?/?static/img/[^\s"'`)\]>]+?\.(?:avif|bmp|gif|ico|jpe?g|png|svg|tiff?|webp)""",
71+
re.IGNORECASE,
72+
)
73+
74+
75+
def strip_url_suffix(path: str) -> str:
76+
return path.split("?", 1)[0].split("#", 1)[0]
77+
78+
79+
def normalize_reference_to_static_path(reference: str) -> Path | None:
80+
cleaned = strip_url_suffix(reference.strip())
81+
lowered = cleaned.lower()
82+
83+
if lowered.startswith("/img/"):
84+
return Path("img") / cleaned[len("/img/") :]
85+
if lowered.startswith("img/"):
86+
return Path("img") / cleaned[len("img/") :]
87+
if lowered.startswith("@site/static/"):
88+
return Path(cleaned[len("@site/static/") :])
89+
if lowered.startswith("/static/"):
90+
return Path(cleaned[len("/static/") :])
91+
if lowered.startswith("static/"):
92+
return Path(cleaned[len("static/") :])
93+
if lowered.startswith(("http://", "https://", "data:", "mailto:", "#", "/")):
94+
return None
95+
96+
# Dynamic React requires like /static/img/${imageUrl} often pass "misc/foo.svg".
97+
if "/" in cleaned and Path(cleaned).suffix.lower() in IMAGE_SUFFIXES:
98+
return Path("img") / cleaned
99+
100+
return None
101+
102+
103+
def extract_reference_tokens(text: str) -> set[str]:
104+
tokens: set[str] = set()
105+
tokens.update(match.group(0) for match in IMG_WEB_PATH_RE.finditer(text))
106+
tokens.update(match.group(0) for match in STATIC_PATH_RE.finditer(text))
107+
tokens.update(match.group("path") for match in IMAGE_TOKEN_RE.finditer(text))
108+
return {strip_url_suffix(token) for token in tokens}
109+
110+
111+
def iter_repo_text_files(repo_root: Path, sources: Iterable[str]) -> Iterable[Path]:
112+
for source in sources:
113+
source_path = (repo_root / source).resolve()
114+
if not source_path.exists():
115+
continue
116+
117+
if source_path.is_file():
118+
if source_path.suffix.lower() in TEXT_SUFFIXES:
119+
yield source_path
120+
continue
121+
122+
for root, dirs, files in os.walk(source_path):
123+
dirs[:] = [name for name in dirs if name not in EXCLUDED_DIRS and not name.startswith(".")]
124+
root_path = Path(root)
125+
126+
for filename in files:
127+
if filename.startswith("."):
128+
continue
129+
file_path = root_path / filename
130+
if file_path.suffix.lower() in TEXT_SUFFIXES:
131+
yield file_path
132+
133+
134+
def iter_static_images(static_dir: Path) -> Iterable[Path]:
135+
for path in static_dir.rglob("*"):
136+
if not path.is_file():
137+
continue
138+
if path.name.startswith("."):
139+
continue
140+
if path.suffix.lower() not in IMAGE_SUFFIXES:
141+
continue
142+
yield path
143+
144+
145+
def find_used_static_images(
146+
repo_root: Path, static_dir: Path, sources: Iterable[str]
147+
) -> tuple[set[Path], set[str]]:
148+
used: set[Path] = set()
149+
missing_refs: set[str] = set()
150+
151+
for source_file in iter_repo_text_files(repo_root, sources):
152+
text = source_file.read_text(encoding="utf-8", errors="ignore")
153+
154+
for token in extract_reference_tokens(text):
155+
static_relative = normalize_reference_to_static_path(token)
156+
if static_relative is None:
157+
continue
158+
159+
target = (static_dir / static_relative).resolve()
160+
if target.exists():
161+
used.add(target)
162+
elif static_relative.parts and static_relative.parts[0] == "img":
163+
missing_refs.add(token)
164+
165+
return used, missing_refs
166+
167+
168+
def main() -> None:
169+
parser = argparse.ArgumentParser(description="Find images under static/ that are not referenced.")
170+
parser.add_argument("--repo-root", default=os.getcwd(), help="Repository root (default: cwd)")
171+
parser.add_argument(
172+
"--static-dir",
173+
default="static",
174+
help="Path to static directory, relative to --repo-root if not absolute (default: static)",
175+
)
176+
parser.add_argument(
177+
"--show-missing-refs",
178+
action="store_true",
179+
help="Also print image references that point to missing files in static/",
180+
)
181+
parser.add_argument(
182+
"--fail-on-unused",
183+
action="store_true",
184+
help="Exit with code 1 when unused images are found.",
185+
)
186+
parser.add_argument(
187+
"--source",
188+
action="append",
189+
default=[],
190+
help=(
191+
"Additional source path to scan for references. "
192+
"Can be provided multiple times and is relative to --repo-root when not absolute."
193+
),
194+
)
195+
args = parser.parse_args()
196+
197+
repo_root = Path(args.repo_root).resolve()
198+
static_dir = Path(args.static_dir)
199+
if not static_dir.is_absolute():
200+
static_dir = (repo_root / static_dir).resolve()
201+
202+
if not static_dir.exists():
203+
raise SystemExit(f"static directory not found: {static_dir}")
204+
205+
all_images = {path.resolve() for path in iter_static_images(static_dir)}
206+
sources = list(DEFAULT_REFERENCE_SOURCES)
207+
sources.extend(args.source)
208+
used_images, missing_refs = find_used_static_images(repo_root, static_dir, sources)
209+
unused_images = sorted(path for path in all_images if path not in used_images)
210+
211+
print(f"Scanned {len(all_images)} image files under {static_dir}")
212+
print(f"Referenced images: {len(used_images)}")
213+
print(f"Unused images: {len(unused_images)}")
214+
print("")
215+
216+
for path in unused_images:
217+
print(path.relative_to(repo_root).as_posix())
218+
219+
if args.show_missing_refs and missing_refs:
220+
print("")
221+
print(f"Missing image references ({len(missing_refs)}):")
222+
for ref in sorted(missing_refs):
223+
print(ref)
224+
225+
if args.fail_on_unused and unused_images:
226+
raise SystemExit(1)
227+
228+
229+
if __name__ == "__main__":
230+
main()
Binary file not shown.
Binary file not shown.
-202 KB
Binary file not shown.
-135 KB
Binary file not shown.
-170 KB
Binary file not shown.
-964 KB
Binary file not shown.
-146 KB
Binary file not shown.
-109 KB
Binary file not shown.
-84.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)