Skip to content

Commit 13d6381

Browse files
authored
agent: ingestion pin CPython source SHAs (#59)
Merged by Vision under delegated forge ownership. Gilfoyle implemented issue #51, Heimdall independently verified head 8ed5a36, and all CI/Security/CodeQL/CodeRabbit checks passed.
1 parent 3141462 commit 13d6381

4 files changed

Lines changed: 120 additions & 15 deletions

File tree

.planning/agent-context/cpython-source-sha-pin.md

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,22 @@
5757
## 5. Decision log
5858

5959
- Resolved SHAs (tag → 40-hex commit), one line each:
60-
- 3.10 / v3.10.20 →
61-
- 3.11 / v3.11.15 →
62-
- 3.12 / v3.12.13 →
63-
- 3.13 / v3.13.13 →
64-
- 3.14 / v3.14.4 →
65-
- Where/how the verification aborts on mismatch:
60+
- 3.10 / v3.10.20 → 842e987df856a5d4db37933c62a3456930a19092
61+
- 3.11 / v3.11.15 → 2340a037f7450e70fccfe411e6531afb4d57a312
62+
- 3.12 / v3.12.13 → 3bb231a6a5dc02b95658877318bf61501a7209e9
63+
- 3.13 / v3.13.13 → 01104ce1beb3135c2e0c01ec835b994c1f55a1c0
64+
- 3.14 / v3.14.4 → 23116f998f6789d8c2fbe5ed5b8146854c8c2a4f
65+
- Where/how the verification aborts on mismatch: after the shallow
66+
tag-based clone in `build-index`, `git -C <clone_dir> rev-parse HEAD` is
67+
compared to the authoritative config SHA. A mismatch logs the version, tag,
68+
actual SHA, and expected SHA, then raises `SystemExit(1)` before Sphinx setup
69+
or content ingestion can proceed.
6670
- **Draft SECURITY.md threat-model paragraph (for Vision to apply):**
67-
>
71+
> The largest build-time supply-chain input is the `build-index` clone of the
72+
> upstream CPython repository, which provides the source tree used to generate
73+
> canonical documentation content. Each supported CPython docs release is pinned
74+
> to the exact commit SHA that its human-readable release tag currently resolves
75+
> to; the tag is retained for operator readability, but the SHA is the
76+
> authoritative integrity anchor. If a tag is reissued, moved, or otherwise
77+
> resolves to different source content, the build fails before Sphinx setup or
78+
> content ingestion rather than silently publishing changed documentation.

src/mcp_server_python_docs/__main__.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def _consume_saved_stdout_fd() -> int:
6565
logger = logging.getLogger("mcp_server_python_docs")
6666

6767
# === Now safe to import everything else ===
68+
import subprocess # noqa: E402
69+
6870
import click # noqa: E402
6971

7072
from mcp_server_python_docs.ingestion.cpython_versions import ( # noqa: E402
@@ -110,6 +112,33 @@ def serve() -> None:
110112
pass # Client disconnected (HYGN-03)
111113

112114

115+
def _verify_cpython_source_sha(
116+
clone_dir: str,
117+
*,
118+
version: str,
119+
tag: str,
120+
expected_sha: str,
121+
) -> None:
122+
"""Abort the docs build if a CPython tag resolves to unexpected content."""
123+
rev_parse = subprocess.run(
124+
["git", "-C", clone_dir, "rev-parse", "HEAD"],
125+
check=True,
126+
capture_output=True,
127+
text=True,
128+
)
129+
actual_sha = rev_parse.stdout.strip()
130+
if actual_sha != expected_sha:
131+
logger.error(
132+
"CPython %s source integrity check failed: tag %s "
133+
"resolved to %s, expected %s. Aborting build.",
134+
version,
135+
tag,
136+
actual_sha,
137+
expected_sha,
138+
)
139+
raise SystemExit(1)
140+
141+
113142
@main.command("build-index")
114143
@click.option(
115144
"--versions",
@@ -124,7 +153,6 @@ def serve() -> None:
124153
def build_index(versions: str, skip_content: bool) -> None:
125154
"""Build the documentation index from objects.inv and Sphinx JSON."""
126155
import shutil
127-
import subprocess
128156
import tempfile
129157
import venv
130158
from pathlib import Path
@@ -224,6 +252,12 @@ def build_index(versions: str, skip_content: bool) -> None:
224252
capture_output=True,
225253
text=True,
226254
)
255+
_verify_cpython_source_sha(
256+
clone_dir,
257+
version=version,
258+
tag=config["tag"],
259+
expected_sha=config["sha"],
260+
)
227261

228262
# Create dedicated Sphinx venv (INGR-C-02)
229263
venv_dir = os.path.join(clone_dir, "_sphinx_venv")

src/mcp_server_python_docs/ingestion/cpython_versions.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class CPythonDocsBuildConfig(TypedDict):
88
"""Build settings for one CPython documentation release."""
99

1010
tag: str
11+
sha: str
1112
sphinx_pin: str
1213

1314

@@ -21,12 +22,32 @@ class CPythonDocsBuildConfig(TypedDict):
2122

2223
SUPPORTED_DOC_VERSIONS_CSV: Final[str] = ",".join(SUPPORTED_DOC_VERSIONS)
2324

24-
# CPython git tags are pinned so content builds are reproducible and do not
25-
# drift when a maintenance branch receives new commits.
25+
# CPython git SHAs are authoritative for content build integrity. Tags are kept
26+
# for human-readable version mapping, but a moved tag must fail verification.
2627
CPYTHON_DOCS_BUILD_CONFIG: Final[dict[str, CPythonDocsBuildConfig]] = {
27-
"3.10": {"tag": "v3.10.20", "sphinx_pin": "sphinx==3.4.3"},
28-
"3.11": {"tag": "v3.11.15", "sphinx_pin": "sphinx~=7.2.0"},
29-
"3.12": {"tag": "v3.12.13", "sphinx_pin": "sphinx~=8.2.0"},
30-
"3.13": {"tag": "v3.13.13", "sphinx_pin": "sphinx<9.0.0"},
31-
"3.14": {"tag": "v3.14.4", "sphinx_pin": "sphinx<9.0.0"},
28+
"3.10": {
29+
"tag": "v3.10.20",
30+
"sha": "842e987df856a5d4db37933c62a3456930a19092",
31+
"sphinx_pin": "sphinx==3.4.3",
32+
},
33+
"3.11": {
34+
"tag": "v3.11.15",
35+
"sha": "2340a037f7450e70fccfe411e6531afb4d57a312",
36+
"sphinx_pin": "sphinx~=7.2.0",
37+
},
38+
"3.12": {
39+
"tag": "v3.12.13",
40+
"sha": "3bb231a6a5dc02b95658877318bf61501a7209e9",
41+
"sphinx_pin": "sphinx~=8.2.0",
42+
},
43+
"3.13": {
44+
"tag": "v3.13.13",
45+
"sha": "01104ce1beb3135c2e0c01ec835b994c1f55a1c0",
46+
"sphinx_pin": "sphinx<9.0.0",
47+
},
48+
"3.14": {
49+
"tag": "v3.14.4",
50+
"sha": "23116f998f6789d8c2fbe5ed5b8146854c8c2a4f",
51+
"sphinx_pin": "sphinx<9.0.0",
52+
},
3253
}

tests/test_ingestion.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99

1010
import io
1111
import os
12+
import re
1213
import runpy
1314
import shutil
15+
import subprocess
1416
import sys
1517
import types
1618

@@ -51,8 +53,45 @@ def test_supported_versions_have_pinned_docs_build_config(self):
5153
for version in SUPPORTED_DOC_VERSIONS:
5254
config = CPYTHON_DOCS_BUILD_CONFIG[version]
5355
assert config["tag"].startswith(f"v{version}.")
56+
assert re.fullmatch(r"[0-9a-f]{40}", config["sha"])
5457
assert config["sphinx_pin"].startswith("sphinx")
5558

59+
def test_cpython_source_sha_verification_aborts_on_mismatch(
60+
self,
61+
monkeypatch,
62+
caplog,
63+
):
64+
from mcp_server_python_docs import __main__ as cli_main
65+
66+
calls: list[list[str]] = []
67+
68+
def fake_run(
69+
cmd: list[str],
70+
*,
71+
check: bool,
72+
capture_output: bool,
73+
text: bool,
74+
) -> subprocess.CompletedProcess[str]:
75+
calls.append(cmd)
76+
assert check is True
77+
assert capture_output is True
78+
assert text is True
79+
return subprocess.CompletedProcess(cmd, 0, stdout="b" * 40 + "\n")
80+
81+
monkeypatch.setattr(cli_main.subprocess, "run", fake_run)
82+
83+
with pytest.raises(SystemExit) as exc_info:
84+
cli_main._verify_cpython_source_sha(
85+
"/tmp/cpython-3.14",
86+
version="3.14",
87+
tag="v3.14.4",
88+
expected_sha="a" * 40,
89+
)
90+
91+
assert exc_info.value.code == 1
92+
assert calls == [["git", "-C", "/tmp/cpython-3.14", "rev-parse", "HEAD"]]
93+
assert "source integrity check failed" in caplog.text
94+
5695

5796
class TestJsonBuildRequirements:
5897
def test_omits_html_only_sphinx_extensions(self, tmp_path):

0 commit comments

Comments
 (0)