Skip to content

Commit 2323166

Browse files
Disallow crawling 3.9 docs in robots.txt (#332)
Automatically generate it from EOL versions per https://peps.python.org/api/release-cycle.json
1 parent c672bf5 commit 2323166

File tree

3 files changed

+38
-18
lines changed

3 files changed

+38
-18
lines changed

build_docs.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,8 @@ def build_docs(args: argparse.Namespace) -> int:
12511251

12521252
build_sitemap(versions, languages, args.www_root, args.group)
12531253
build_404(args.www_root, args.group)
1254-
copy_robots_txt(
1254+
build_robots_txt(
1255+
versions,
12551256
args.www_root,
12561257
args.group,
12571258
args.skip_cache_invalidation,
@@ -1338,20 +1339,23 @@ def build_404(www_root: Path, group: str) -> None:
13381339
chgrp(not_found_file, group=group)
13391340

13401341

1341-
def copy_robots_txt(
1342+
def build_robots_txt(
1343+
versions: Versions,
13421344
www_root: Path,
13431345
group: str,
13441346
skip_cache_invalidation: bool,
13451347
http: urllib3.PoolManager,
13461348
) -> None:
1347-
"""Copy robots.txt to www_root."""
1349+
"""Build robots.txt to www_root."""
13481350
if not www_root.exists():
1349-
logging.info("Skipping copying robots.txt (www root does not even exist).")
1351+
logging.info("Skipping robots.txt generation (www root does not even exist).")
13501352
return
1351-
logging.info("Copying robots.txt...")
1353+
logging.info("Starting robots.txt generation...")
13521354
template_path = HERE / "templates" / "robots.txt"
1355+
template = jinja2.Template(template_path.read_text(encoding="UTF-8"))
1356+
rendered_template = template.render(versions=versions)
13531357
robots_path = www_root / "robots.txt"
1354-
shutil.copyfile(template_path, robots_path)
1358+
robots_path.write_text(rendered_template, encoding="UTF-8")
13551359
robots_path.chmod(0o775)
13561360
chgrp(robots_path, group=group)
13571361
if not skip_cache_invalidation:

templates/robots.txt

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,8 @@ Disallow: /2.2/
1313
Disallow: /2.3/
1414
Disallow: /2.4/
1515
Disallow: /2.5/
16-
Disallow: /2.6/
17-
Disallow: /2.7/
18-
Disallow: /3.0/
19-
Disallow: /3.1/
20-
Disallow: /3.2/
21-
Disallow: /3.3/
22-
Disallow: /3.4/
23-
Disallow: /3.5/
24-
Disallow: /3.6/
25-
Disallow: /3.7/
26-
Disallow: /3.8/
16+
{% for version in versions -%}
17+
{% if version.status == "EOL" -%}
18+
Disallow: /{{ version.name }}/
19+
{% endif -%}
20+
{% endfor %}

tests/test_build_docs.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from unittest.mock import patch
2+
13
import pytest
24

3-
from build_docs import format_seconds
5+
from build_docs import Version, Versions, build_robots_txt, format_seconds
46

57

68
@pytest.mark.parametrize(
@@ -24,3 +26,23 @@
2426
)
2527
def test_format_seconds(seconds: float, expected: str) -> None:
2628
assert format_seconds(seconds) == expected
29+
30+
31+
@patch("build_docs.chgrp")
32+
def test_build_robots_txt(mock_chgrp, tmp_path) -> None:
33+
versions = Versions([
34+
Version(name="3.14", status="EOL", branch_or_tag="3.14"),
35+
Version(name="3.15", status="EOL", branch_or_tag="3.15"),
36+
Version(name="3.16", status="security-fixes", branch_or_tag="3.16"),
37+
Version(name="3.17", status="stable", branch_or_tag="2.17"),
38+
])
39+
40+
build_robots_txt(
41+
versions, tmp_path, group="", skip_cache_invalidation=True, http=None
42+
)
43+
44+
result = (tmp_path / "robots.txt").read_text()
45+
assert "Disallow: /3.14/" in result
46+
assert "Disallow: /3.15/" in result
47+
assert "/3.16/" not in result
48+
assert "/3.17/" not in result

0 commit comments

Comments
 (0)