Skip to content

Commit b3d4b94

Browse files
committed
Vendor example app includes as local files
Fixes DOC-17061 Summary of changes: - Vendored example app source snippets into local include files. - Replaced example app remote includes with local includes. - Added scripts to vendor and validate example app includes.
1 parent e2dc6d0 commit b3d4b94

219 files changed

Lines changed: 4186 additions & 603 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env python3
2+
"""Validate vendored example app includes without making network requests."""
3+
4+
from __future__ import annotations
5+
6+
import runpy
7+
from pathlib import Path
8+
9+
10+
SCRIPT_DIR = Path(__file__).resolve().parent
11+
VERIFY_SCRIPT = SCRIPT_DIR / "verify_example_app_includes.py"
12+
13+
14+
def main() -> int:
15+
namespace = runpy.run_path(str(VERIFY_SCRIPT))
16+
return namespace["main"]()
17+
18+
19+
if __name__ == "__main__":
20+
raise SystemExit(main())
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
#!/usr/bin/env python3
2+
"""Fetch example app source referenced by docs pages into local include files."""
3+
4+
from __future__ import annotations
5+
6+
import hashlib
7+
import re
8+
import sys
9+
import time
10+
import urllib.error
11+
import urllib.parse
12+
import urllib.request
13+
from concurrent.futures import ThreadPoolExecutor, as_completed
14+
from dataclasses import dataclass
15+
from pathlib import Path
16+
17+
18+
REPO_ROOT = Path(__file__).resolve().parents[2]
19+
SRC_CURRENT = REPO_ROOT / "src" / "current"
20+
VENDORED_ROOT = SRC_CURRENT / "_includes" / "example-apps"
21+
FETCH_WORKERS = 16
22+
23+
REMOTE_INCLUDE_PATTERN = re.compile(
24+
r"{%\s*remote_include\s+"
25+
r"(?P<url>https://raw\.githubusercontent\.com/"
26+
r"(?P<owner>[^/\s%]+)/(?P<repo>[^/\s%]+)/(?P<ref>[^/\s%]+)/(?P<path>[^\s%]+))"
27+
r"(?P<args>\s+\|\|.*?|\s*)%}"
28+
)
29+
HTML_COMMENT_PATTERN = re.compile(r"<!--.*?-->", re.DOTALL)
30+
31+
32+
@dataclass(frozen=True)
33+
class IncludeSpec:
34+
url: str
35+
owner: str
36+
repo: str
37+
ref: str
38+
path: str
39+
start_marker: str | None = None
40+
end_marker: str | None = None
41+
42+
@property
43+
def is_slice(self) -> bool:
44+
return self.start_marker is not None and self.end_marker is not None
45+
46+
47+
def is_example_app_remote(owner: str, repo: str) -> bool:
48+
return owner == "cockroachlabs" or (
49+
owner == "cockroachdb" and repo.startswith("example-app-")
50+
)
51+
52+
53+
def parse_markers(args: str) -> tuple[str | None, str | None]:
54+
args = args.strip()
55+
if not args:
56+
return None, None
57+
if not args.startswith("||"):
58+
raise ValueError(f"unexpected remote_include args: {args}")
59+
parts = args[2:].split("||", 1)
60+
if len(parts) != 2:
61+
raise ValueError(f"expected start and end markers in: {args}")
62+
start_marker = parts[0].strip()
63+
end_marker = parts[1].strip()
64+
if not start_marker or not end_marker:
65+
raise ValueError(f"empty start or end marker in: {args}")
66+
return start_marker, end_marker
67+
68+
69+
def fetch(url: str) -> str:
70+
last_error: Exception | None = None
71+
for attempt in range(3):
72+
try:
73+
with urllib.request.urlopen(url, timeout=30) as response:
74+
charset = response.headers.get_content_charset() or "utf-8"
75+
return response.read().decode(charset)
76+
except (UnicodeDecodeError, urllib.error.URLError, TimeoutError) as e:
77+
last_error = e
78+
if attempt < 2:
79+
time.sleep(2**attempt)
80+
raise RuntimeError(f"failed to fetch {url}: {last_error}")
81+
82+
83+
def extract_between_markers(content: str, spec: IncludeSpec) -> str:
84+
if spec.start_marker is None or spec.end_marker is None:
85+
return content
86+
87+
start = content.find(spec.start_marker)
88+
if start == -1:
89+
raise RuntimeError(f"start marker not found in {spec.url}: {spec.start_marker}")
90+
after_start = start + len(spec.start_marker)
91+
end = content.find(spec.end_marker, after_start)
92+
if end == -1:
93+
raise RuntimeError(f"end marker not found in {spec.url}: {spec.end_marker}")
94+
95+
snippet_start = content.find("\n", after_start)
96+
if snippet_start == -1 or snippet_start > end:
97+
snippet_start = after_start
98+
else:
99+
snippet_start += 1
100+
101+
snippet_end = content.rfind("\n", snippet_start, end)
102+
if snippet_end == -1:
103+
snippet_end = end
104+
else:
105+
snippet_end += 1
106+
107+
snippet = content[snippet_start:snippet_end]
108+
return snippet
109+
110+
111+
def normalize_include_content(content: str) -> str:
112+
content = content.replace("\r\n", "\n").replace("\r", "\n")
113+
lines = [line.rstrip() for line in content.splitlines()]
114+
return "\n".join(lines).rstrip("\n") + "\n"
115+
116+
117+
def marker_slug(marker: str) -> str:
118+
marker = marker.strip()
119+
marker = re.sub(r"^(--|#|//|/\*+|\*)\s*", "", marker)
120+
marker = re.sub(r"\s*\*/$", "", marker)
121+
marker = re.sub(r"^(START|BEGIN)\s+", "", marker, flags=re.IGNORECASE)
122+
marker = marker.strip().lower()
123+
marker = re.sub(r"[^a-z0-9]+", "-", marker).strip("-")
124+
return marker or hashlib.sha256(marker.encode()).hexdigest()[:8]
125+
126+
127+
def local_include_path(spec: IncludeSpec) -> Path:
128+
rel_parts = [spec.owner, spec.repo, spec.ref]
129+
source_path = Path(urllib.parse.unquote(spec.path))
130+
if spec.is_slice:
131+
assert spec.start_marker is not None
132+
filename = source_path.name
133+
suffixes = "".join(source_path.suffixes)
134+
if suffixes:
135+
stem = filename[: -len(suffixes)]
136+
snippet_name = f"{stem}__{marker_slug(spec.start_marker)}{suffixes}"
137+
else:
138+
snippet_name = f"{filename}__{marker_slug(spec.start_marker)}"
139+
source_path = source_path.with_name(snippet_name)
140+
return Path("example-apps", *rel_parts) / source_path
141+
142+
143+
def include_text(spec: IncludeSpec) -> str:
144+
return f"{{% include {local_include_path(spec).as_posix()} %}}"
145+
146+
147+
def comment_spans(text: str) -> list[tuple[int, int]]:
148+
return [(m.start(), m.end()) for m in HTML_COMMENT_PATTERN.finditer(text)]
149+
150+
151+
def in_spans(offset: int, spans: list[tuple[int, int]]) -> bool:
152+
return any(start <= offset < end for start, end in spans)
153+
154+
155+
def spec_from_match(match: re.Match[str]) -> IncludeSpec | None:
156+
owner = match.group("owner")
157+
repo = match.group("repo")
158+
if not is_example_app_remote(owner, repo):
159+
return None
160+
161+
start_marker, end_marker = parse_markers(match.group("args"))
162+
return IncludeSpec(
163+
url=match.group("url"),
164+
owner=owner,
165+
repo=repo,
166+
ref=match.group("ref"),
167+
path=match.group("path"),
168+
start_marker=start_marker,
169+
end_marker=end_marker,
170+
)
171+
172+
173+
def rewrite_page(text: str) -> tuple[str, set[IncludeSpec]]:
174+
spans = comment_spans(text)
175+
parts: list[str] = []
176+
specs: set[IncludeSpec] = set()
177+
last = 0
178+
179+
for match in REMOTE_INCLUDE_PATTERN.finditer(text):
180+
if in_spans(match.start(), spans):
181+
continue
182+
spec = spec_from_match(match)
183+
if spec is None:
184+
continue
185+
specs.add(spec)
186+
parts.append(text[last : match.start()])
187+
parts.append(include_text(spec))
188+
last = match.end()
189+
190+
if not specs:
191+
return text, set()
192+
193+
parts.append(text[last:])
194+
return "".join(parts), specs
195+
196+
197+
def write_include(spec: IncludeSpec, remote_content: str) -> Path:
198+
content = normalize_include_content(extract_between_markers(remote_content, spec))
199+
if not content:
200+
raise RuntimeError(f"empty include content for {spec}")
201+
202+
dest = SRC_CURRENT / "_includes" / local_include_path(spec)
203+
dest.parent.mkdir(parents=True, exist_ok=True)
204+
dest.write_text(content)
205+
return dest
206+
207+
208+
def main() -> int:
209+
page_rewrites: list[tuple[Path, str]] = []
210+
specs: set[IncludeSpec] = set()
211+
212+
for page in sorted(SRC_CURRENT.glob("v*/*.md")):
213+
text = page.read_text()
214+
rewritten, page_specs = rewrite_page(text)
215+
if page_specs:
216+
specs.update(page_specs)
217+
page_rewrites.append((page, rewritten))
218+
219+
if not specs:
220+
print("No example app remote_include tags found.")
221+
return 0
222+
223+
urls = sorted({spec.url for spec in specs})
224+
remote_content: dict[str, str] = {}
225+
with ThreadPoolExecutor(max_workers=min(FETCH_WORKERS, len(urls))) as executor:
226+
futures = {executor.submit(fetch, url): url for url in urls}
227+
for future in as_completed(futures):
228+
url = futures[future]
229+
remote_content[url] = future.result()
230+
231+
expected: set[Path] = set()
232+
include_owners: dict[Path, IncludeSpec] = {}
233+
for spec in sorted(specs, key=lambda s: local_include_path(s).as_posix()):
234+
rel = local_include_path(spec)
235+
previous = include_owners.get(rel)
236+
if previous is not None and previous != spec:
237+
raise RuntimeError(f"local include path collision: {rel}")
238+
include_owners[rel] = spec
239+
240+
dest = write_include(spec, remote_content[spec.url])
241+
expected.add(dest)
242+
243+
for page, rewritten in page_rewrites:
244+
page.write_text(rewritten)
245+
246+
if VENDORED_ROOT.exists():
247+
for existing in VENDORED_ROOT.rglob("*"):
248+
if existing.is_file() and existing not in expected:
249+
existing.unlink()
250+
251+
missing = [path for path in sorted(expected) if not path.exists()]
252+
remaining: list[str] = []
253+
for page in sorted(SRC_CURRENT.glob("v*/*.md")):
254+
text = page.read_text()
255+
for match in REMOTE_INCLUDE_PATTERN.finditer(text):
256+
spec = spec_from_match(match)
257+
if spec is not None:
258+
remaining.append(str(page.relative_to(REPO_ROOT)))
259+
break
260+
261+
print(f"Fetched {len(urls)} example app source files.")
262+
print(f"Wrote {len(expected)} example app include files.")
263+
print(f"Updated {len(page_rewrites)} docs pages.")
264+
265+
if missing:
266+
print("Missing vendored example app files:", file=sys.stderr)
267+
for path in missing:
268+
print(f" {path.relative_to(REPO_ROOT)}", file=sys.stderr)
269+
return 1
270+
271+
if remaining:
272+
print("Remaining example app remote includes:", file=sys.stderr)
273+
for path in remaining:
274+
print(f" {path}", file=sys.stderr)
275+
return 1
276+
277+
return 0
278+
279+
280+
if __name__ == "__main__":
281+
raise SystemExit(main())
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/usr/bin/env python3
2+
"""Verify example app source is served from local include files."""
3+
4+
from __future__ import annotations
5+
6+
import re
7+
import sys
8+
from pathlib import Path
9+
10+
11+
REPO_ROOT = Path(__file__).resolve().parents[2]
12+
SRC_CURRENT = REPO_ROOT / "src" / "current"
13+
VENDORED_ROOT = SRC_CURRENT / "_includes" / "example-apps"
14+
15+
REMOTE_INCLUDE_PATTERN = re.compile(
16+
r"{%\s*remote_include\s+"
17+
r"(?P<url>https://raw\.githubusercontent\.com/"
18+
r"(?P<owner>[^/\s%]+)/(?P<repo>[^/\s%]+)/[^\s%]+)"
19+
r"(?:\s+\|\|.*?|\s*)%}"
20+
)
21+
LOCAL_INCLUDE_PATTERN = re.compile(
22+
r"{%\s*include\s+"
23+
r"(?P<path>example-apps/[^\s%]+)\s*%}"
24+
)
25+
26+
27+
def is_example_app_remote(owner: str, repo: str) -> bool:
28+
return owner == "cockroachlabs" or (
29+
owner == "cockroachdb" and repo.startswith("example-app-")
30+
)
31+
32+
33+
def rel(path: Path) -> str:
34+
return str(path.relative_to(REPO_ROOT))
35+
36+
37+
def main() -> int:
38+
remote_refs: list[tuple[Path, int, str]] = []
39+
include_sites = 0
40+
expected_files: set[Path] = set()
41+
42+
for page in sorted(SRC_CURRENT.glob("v*/*.md")):
43+
text = page.read_text()
44+
for lineno, line in enumerate(text.splitlines(), start=1):
45+
for match in REMOTE_INCLUDE_PATTERN.finditer(line):
46+
if is_example_app_remote(match.group("owner"), match.group("repo")):
47+
remote_refs.append((page, lineno, line.strip()))
48+
49+
for match in LOCAL_INCLUDE_PATTERN.finditer(line):
50+
include_sites += 1
51+
expected_files.add(SRC_CURRENT / "_includes" / match.group("path"))
52+
53+
existing_files = set(VENDORED_ROOT.rglob("*")) if VENDORED_ROOT.exists() else set()
54+
existing_files = {path for path in existing_files if path.is_file()}
55+
missing_files = sorted(expected_files - existing_files)
56+
stale_files = sorted(existing_files - expected_files)
57+
empty_files = sorted(path for path in existing_files if path.stat().st_size == 0)
58+
59+
if remote_refs:
60+
print("Found example app remote includes:", file=sys.stderr)
61+
for page, lineno, line in remote_refs[:50]:
62+
print(f" {rel(page)}:{lineno}: {line}", file=sys.stderr)
63+
if len(remote_refs) > 50:
64+
print(f" ... and {len(remote_refs) - 50} more", file=sys.stderr)
65+
66+
if missing_files:
67+
print("Missing example app include files:", file=sys.stderr)
68+
for path in missing_files[:50]:
69+
print(f" {rel(path)}", file=sys.stderr)
70+
if len(missing_files) > 50:
71+
print(f" ... and {len(missing_files) - 50} more", file=sys.stderr)
72+
73+
if stale_files:
74+
print("Unreferenced example app include files:", file=sys.stderr)
75+
for path in stale_files[:50]:
76+
print(f" {rel(path)}", file=sys.stderr)
77+
if len(stale_files) > 50:
78+
print(f" ... and {len(stale_files) - 50} more", file=sys.stderr)
79+
80+
if empty_files:
81+
print("Empty example app include files:", file=sys.stderr)
82+
for path in empty_files[:50]:
83+
print(f" {rel(path)}", file=sys.stderr)
84+
if len(empty_files) > 50:
85+
print(f" ... and {len(empty_files) - 50} more", file=sys.stderr)
86+
87+
if remote_refs or missing_files or stale_files or empty_files:
88+
return 1
89+
90+
print(f"Verified {include_sites} example app include sites.")
91+
print(f"Verified {len(existing_files)} example app include files.")
92+
return 0
93+
94+
95+
if __name__ == "__main__":
96+
raise SystemExit(main())

0 commit comments

Comments
 (0)