Skip to content

Commit fb1c398

Browse files
agent: document PyYAML safe-loader boundary
1 parent 362fed7 commit fb1c398

2 files changed

Lines changed: 66 additions & 0 deletions

File tree

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# YAML Trust Boundary
2+
3+
`src/mcp_server_python_docs/data/synonyms.yaml` is the project's only packaged
4+
YAML data input. It is shipped inside the wheel and read through
5+
`importlib.resources`; users do not provide YAML at runtime.
6+
7+
The file is parsed only with `yaml.safe_load` in these call sites:
8+
9+
- `src/mcp_server_python_docs/server.py` when the MCP server starts.
10+
- `src/mcp_server_python_docs/ingestion/sphinx_json.py` when ingestion populates
11+
the synonym table.
12+
13+
There are no `yaml.load`, `yaml.unsafe_load`, or custom non-`SafeLoader` parser
14+
call sites in `src/`. The regression test
15+
`tests/test_synonyms.py::test_yaml_loaded_only_via_safe_load` scans source files
16+
for unsafe YAML loaders, confirms both expected `safe_load` call sites, and
17+
asserts that `synonyms.yaml` is the only YAML file under
18+
`src/mcp_server_python_docs/`.
19+
20+
Recommended future `SECURITY.md` wording for human review:
21+
22+
> The server parses only one packaged YAML input, `synonyms.yaml`, using
23+
> `yaml.safe_load`; user-supplied YAML is not accepted.

tests/test_synonyms.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
- Key concepts are present
88
"""
99
import importlib.resources
10+
import re
11+
from pathlib import Path
1012

1113
import yaml
1214

@@ -76,3 +78,44 @@ def test_importlib_resources_path(self):
7678
assert path.exists(), f"synonyms.yaml not found at {path}"
7779
content = path.read_text()
7880
assert len(content) > 0, "synonyms.yaml is empty"
81+
82+
83+
def test_yaml_loaded_only_via_safe_load():
84+
"""Lock in the packaged-YAML trust boundary for synonyms.yaml."""
85+
repo_root = Path(__file__).resolve().parents[1]
86+
src_root = repo_root / "src"
87+
expected_yaml_input = (
88+
"src/mcp_server_python_docs/data/synonyms.yaml"
89+
)
90+
expected_safe_load_sites = {
91+
"src/mcp_server_python_docs/server.py",
92+
"src/mcp_server_python_docs/ingestion/sphinx_json.py",
93+
}
94+
95+
unsafe_load_call = re.compile(r"\byaml[.]load\s*[(]")
96+
unsafe_loader_name = re.compile(r"\byaml[.]unsafe_load\b")
97+
loader_override = re.compile(r"\bLoader\s*=")
98+
safe_load_call = re.compile(r"\byaml[.]safe_load\s*[(]")
99+
100+
violations: list[str] = []
101+
safe_load_sites: set[str] = set()
102+
103+
for source_path in sorted(src_root.rglob("*.py")):
104+
relative_path = source_path.relative_to(repo_root).as_posix()
105+
for line_number, line in enumerate(source_path.read_text().splitlines(), 1):
106+
if unsafe_load_call.search(line) or unsafe_loader_name.search(line):
107+
violations.append(f"{relative_path}:{line_number}: unsafe YAML load")
108+
if loader_override.search(line) and "SafeLoader" not in line:
109+
violations.append(f"{relative_path}:{line_number}: custom YAML Loader")
110+
if safe_load_call.search(line):
111+
safe_load_sites.add(relative_path)
112+
113+
yaml_inputs = sorted(
114+
path.relative_to(repo_root).as_posix()
115+
for path in src_root.rglob("*")
116+
if path.suffix in {".yaml", ".yml"}
117+
)
118+
119+
assert violations == []
120+
assert expected_safe_load_sites <= safe_load_sites
121+
assert yaml_inputs == [expected_yaml_input]

0 commit comments

Comments
 (0)