Skip to content

Commit ef7736e

Browse files
agent: tighten YAML audit regression test
1 parent fb1c398 commit ef7736e

2 files changed

Lines changed: 13 additions & 14 deletions

File tree

docs/architecture/YAML-TRUST-BOUNDARY.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ The file is parsed only with `yaml.safe_load` in these call sites:
1010
- `src/mcp_server_python_docs/ingestion/sphinx_json.py` when ingestion populates
1111
the synonym table.
1212

13-
There are no `yaml.load`, `yaml.unsafe_load`, or custom non-`SafeLoader` parser
14-
call sites in `src/`. The regression test
13+
There are no `yaml.load` or `yaml.unsafe_load` parser call sites in `src/` or
14+
`tests/`. The regression test
1515
`tests/test_synonyms.py::test_yaml_loaded_only_via_safe_load` scans source files
16-
for unsafe YAML loaders, confirms both expected `safe_load` call sites, and
17-
asserts that `synonyms.yaml` is the only YAML file under
16+
and tests for unsafe YAML loaders, confirms both expected source `safe_load`
17+
call sites, and asserts that `synonyms.yaml` is the only YAML file under
1818
`src/mcp_server_python_docs/`.
1919

2020
Recommended future `SECURITY.md` wording for human review:

tests/test_synonyms.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def test_yaml_loaded_only_via_safe_load():
8484
"""Lock in the packaged-YAML trust boundary for synonyms.yaml."""
8585
repo_root = Path(__file__).resolve().parents[1]
8686
src_root = repo_root / "src"
87+
scan_roots = (src_root, repo_root / "tests")
8788
expected_yaml_input = (
8889
"src/mcp_server_python_docs/data/synonyms.yaml"
8990
)
@@ -94,21 +95,19 @@ def test_yaml_loaded_only_via_safe_load():
9495

9596
unsafe_load_call = re.compile(r"\byaml[.]load\s*[(]")
9697
unsafe_loader_name = re.compile(r"\byaml[.]unsafe_load\b")
97-
loader_override = re.compile(r"\bLoader\s*=")
9898
safe_load_call = re.compile(r"\byaml[.]safe_load\s*[(]")
9999

100100
violations: list[str] = []
101101
safe_load_sites: set[str] = set()
102102

103-
for source_path in sorted(src_root.rglob("*.py")):
104-
relative_path = source_path.relative_to(repo_root).as_posix()
105-
for line_number, line in enumerate(source_path.read_text().splitlines(), 1):
106-
if unsafe_load_call.search(line) or unsafe_loader_name.search(line):
107-
violations.append(f"{relative_path}:{line_number}: unsafe YAML load")
108-
if loader_override.search(line) and "SafeLoader" not in line:
109-
violations.append(f"{relative_path}:{line_number}: custom YAML Loader")
110-
if safe_load_call.search(line):
111-
safe_load_sites.add(relative_path)
103+
for scan_root in scan_roots:
104+
for source_path in sorted(scan_root.rglob("*.py")):
105+
relative_path = source_path.relative_to(repo_root).as_posix()
106+
for line_number, line in enumerate(source_path.read_text().splitlines(), 1):
107+
if unsafe_load_call.search(line) or unsafe_loader_name.search(line):
108+
violations.append(f"{relative_path}:{line_number}: unsafe YAML load")
109+
if source_path.is_relative_to(src_root) and safe_load_call.search(line):
110+
safe_load_sites.add(relative_path)
112111

113112
yaml_inputs = sorted(
114113
path.relative_to(repo_root).as_posix()

0 commit comments

Comments
 (0)