|
7 | 7 | - Key concepts are present |
8 | 8 | """ |
9 | 9 | import importlib.resources |
| 10 | +import re |
| 11 | +from pathlib import Path |
10 | 12 |
|
11 | 13 | import yaml |
12 | 14 |
|
@@ -76,3 +78,45 @@ def test_importlib_resources_path(self): |
76 | 78 | assert path.exists(), f"synonyms.yaml not found at {path}" |
77 | 79 | content = path.read_text() |
78 | 80 | assert len(content) > 0, "synonyms.yaml is empty" |
| 81 | + |
| 82 | + |
| 83 | +def test_yaml_loaded_only_via_safe_load(): |
| 84 | + """Lock in the packaged-YAML trust boundary for synonyms.yaml.""" |
| 85 | + repo_root = Path(__file__).resolve().parents[1] |
| 86 | + src_root = repo_root / "src" |
| 87 | + scan_roots = (src_root, repo_root / "tests") |
| 88 | + expected_yaml_input = ( |
| 89 | + "src/mcp_server_python_docs/data/synonyms.yaml" |
| 90 | + ) |
| 91 | + expected_safe_load_sites = { |
| 92 | + "src/mcp_server_python_docs/server.py", |
| 93 | + "src/mcp_server_python_docs/ingestion/sphinx_json.py", |
| 94 | + } |
| 95 | + |
| 96 | + unsafe_load_call = re.compile(r"\byaml[.]load\s*[(]") |
| 97 | + unsafe_loader_name = re.compile(r"\byaml[.]unsafe_load\b") |
| 98 | + safe_load_call = re.compile(r"\byaml[.]safe_load\s*[(]") |
| 99 | + |
| 100 | + violations: list[str] = [] |
| 101 | + safe_load_sites: set[str] = set() |
| 102 | + |
| 103 | + for scan_root in scan_roots: |
| 104 | + for source_path in sorted(scan_root.rglob("*.py")): |
| 105 | + relative_path = source_path.relative_to(repo_root).as_posix() |
| 106 | + for line_number, line in enumerate( |
| 107 | + source_path.read_text(encoding="utf-8").splitlines(), 1 |
| 108 | + ): |
| 109 | + if unsafe_load_call.search(line) or unsafe_loader_name.search(line): |
| 110 | + violations.append(f"{relative_path}:{line_number}: unsafe YAML load") |
| 111 | + if source_path.is_relative_to(src_root) and safe_load_call.search(line): |
| 112 | + safe_load_sites.add(relative_path) |
| 113 | + |
| 114 | + yaml_inputs = sorted( |
| 115 | + path.relative_to(repo_root).as_posix() |
| 116 | + for path in src_root.rglob("*") |
| 117 | + if path.suffix in {".yaml", ".yml"} |
| 118 | + ) |
| 119 | + |
| 120 | + assert violations == [] |
| 121 | + assert expected_safe_load_sites <= safe_load_sites |
| 122 | + assert yaml_inputs == [expected_yaml_input] |
0 commit comments