Skip to content

Commit 0fafcf4

Browse files
committed
Add backend coverage for authority-pack config, source-host union, and registry fault isolation
Covers the patch lines codecov/patch/Backend flagged on this PR: - test_authority_pack_config.py (new): the fail-fast validators (iter_shape_rules / iter_abbreviations) on every malformed shape, the never-raise _load_yaml / _iter_pack_mapping_files skip paths, and the runtime-scan fault isolation that downgrades a bad pack to log-and-skip. - test_authority_source_hosts.py: manifest-skip branches in pack_declared_source_hosts (no pack.yaml, malformed YAML, non-list source_hosts). - test_authority_pack.py: Command._validate_source_hosts rejects a non-list source_hosts declaration. - test_authority_pack_providers.py: a broken in-pack provider module is logged + skipped without breaking sibling discovery; a duplicate supported-prefix install is warned; authority_pack_dirs skips a non-directory AUTHORITY_PACK_PATHS entry. - test_safe_http.py: _resolve_allowlist fails closed to the baseline when no dynamic provider is registered. Tests only; no production code change.
1 parent 060e5d1 commit 0fafcf4

5 files changed

Lines changed: 406 additions & 1 deletion

File tree

opencontractserver/tests/test_authority_pack.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222

2323
from opencontractserver import enrichment
2424
from opencontractserver.annotations.models import AuthorityNamespace
25+
from opencontractserver.corpuses.management.commands.load_authority_pack import (
26+
Command as LoadAuthorityPackCommand,
27+
)
2528
from opencontractserver.corpuses.models import Corpus
2629
from opencontractserver.corpuses.services.corpus_documents import (
2730
CorpusDocumentService,
@@ -42,6 +45,32 @@ def _load_yaml(path: Path) -> dict:
4245
return yaml.safe_load(path.read_text(encoding="utf-8"))
4346

4447

48+
class ValidateSourceHostsTests(SimpleTestCase):
49+
"""``_validate_source_hosts`` fails fast on a malformed manifest declaration."""
50+
51+
def test_missing_source_hosts_is_ok(self):
52+
# Absent / null source_hosts is valid (a non-scraping pack).
53+
LoadAuthorityPackCommand._validate_source_hosts({})
54+
LoadAuthorityPackCommand._validate_source_hosts({"source_hosts": None})
55+
56+
def test_source_hosts_not_a_list_raises(self):
57+
with self.assertRaisesMessage(CommandError, "must be a list of hostnames"):
58+
LoadAuthorityPackCommand._validate_source_hosts(
59+
{"source_hosts": "tcpbolivia.bo"}
60+
)
61+
62+
def test_source_hosts_entry_not_a_bare_host_raises(self):
63+
with self.assertRaisesMessage(CommandError, "is not a bare hostname"):
64+
LoadAuthorityPackCommand._validate_source_hosts(
65+
{"source_hosts": ["https://tcpbolivia.bo"]}
66+
)
67+
68+
def test_valid_source_hosts_pass(self):
69+
LoadAuthorityPackCommand._validate_source_hosts(
70+
{"source_hosts": ["tcpbolivia.bo", "gacetaoficialdebolivia.gob.bo"]}
71+
)
72+
73+
4574
class BoliviaPackContentTests(SimpleTestCase):
4675
"""Static integrity of the shipped pack — runs with no DB."""
4776

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
"""Validation + fault-isolation of pack-declared citation vocabulary.
2+
3+
``enrichment/services/authority_pack_config.py`` reads each installed pack's
4+
authority-mappings YAML and merges its ``shape_rules`` / ``abbreviations`` onto
5+
the Python baseline. The happy path (a well-formed pack actually changing
6+
classification / extraction) is covered by ``test_authority_pack_taxonomy``; this
7+
module pins the *defensive* contract:
8+
9+
* the fail-fast validators (``iter_shape_rules`` / ``iter_abbreviations``) raise
10+
``ValueError`` on every malformed shape so ``load_authority_pack`` aborts a bad
11+
install loudly, and
12+
* the runtime scan (``pack_declared_shape_rules`` / ``pack_declared_abbreviations``
13+
/ ``_iter_pack_mapping_files`` / ``_load_yaml``) downgrades those raises to
14+
log-and-skip so a single broken pack can never break extraction for every
15+
jurisdiction.
16+
17+
No database required — the config is read from pack files on disk.
18+
"""
19+
20+
from __future__ import annotations
21+
22+
import tempfile
23+
from pathlib import Path
24+
from unittest import mock
25+
26+
from django.test import SimpleTestCase
27+
28+
from opencontractserver.enrichment.services import authority_pack_config as apc
29+
30+
_MODULE = "opencontractserver.enrichment.services.authority_pack_config"
31+
32+
# A reliably-unparseable YAML document (unterminated flow sequence).
33+
_BAD_YAML = "name: oops\nsource: [unterminated"
34+
35+
36+
class IterShapeRulesValidationTests(SimpleTestCase):
37+
"""``iter_shape_rules`` — every malformed shape raises ``ValueError``."""
38+
39+
def test_none_returns_empty(self):
40+
self.assertEqual(apc.iter_shape_rules({}), [])
41+
self.assertEqual(apc.iter_shape_rules({"shape_rules": None}), [])
42+
43+
def test_not_a_list_raises(self):
44+
with self.assertRaisesMessage(ValueError, "must be a list"):
45+
apc.iter_shape_rules({"shape_rules": {"pattern": "^x$"}})
46+
47+
def test_entry_without_pattern_raises(self):
48+
with self.assertRaisesMessage(ValueError, "needs a 'pattern'"):
49+
apc.iter_shape_rules({"shape_rules": [{"jurisdiction": "bo"}]})
50+
51+
def test_uncompilable_pattern_raises(self):
52+
with self.assertRaisesMessage(ValueError, "bad regex"):
53+
apc.iter_shape_rules({"shape_rules": [{"pattern": "([unclosed"}]})
54+
55+
def test_unknown_authority_type_raises(self):
56+
with self.assertRaisesMessage(ValueError, "not in "):
57+
apc.iter_shape_rules(
58+
{
59+
"shape_rules": [
60+
{"pattern": "^bo-ley-\\d+$", "authority_type": "not_a_type"}
61+
]
62+
}
63+
)
64+
65+
def test_valid_entry_normalised(self):
66+
out = apc.iter_shape_rules(
67+
{"shape_rules": [{"pattern": "^bo-ley-\\d+$", "authority_type": "statute"}]}
68+
)
69+
self.assertEqual(len(out), 1)
70+
self.assertEqual(out[0]["authority_type"], "statute")
71+
self.assertIsNone(out[0]["jurisdiction"]) # absent -> None
72+
73+
74+
class IterAbbreviationsValidationTests(SimpleTestCase):
75+
"""``iter_abbreviations`` — every malformed shape raises ``ValueError``."""
76+
77+
def test_none_returns_empty_groups(self):
78+
self.assertEqual(apc.iter_abbreviations({}), {"state": {}, "municipal": {}})
79+
80+
def test_not_a_mapping_raises(self):
81+
with self.assertRaisesMessage(ValueError, "must be a mapping"):
82+
apc.iter_abbreviations({"abbreviations": ["nope"]})
83+
84+
def test_group_not_a_mapping_raises(self):
85+
with self.assertRaisesMessage(ValueError, "must be a mapping"):
86+
apc.iter_abbreviations({"abbreviations": {"state": ["nope"]}})
87+
88+
def test_entry_without_prefix_raises(self):
89+
with self.assertRaisesMessage(ValueError, "needs 'prefix'"):
90+
apc.iter_abbreviations(
91+
{"abbreviations": {"state": {"Bol. Civ. Code": {"jurisdiction": "bo"}}}}
92+
)
93+
94+
def test_unknown_authority_type_raises(self):
95+
with self.assertRaisesMessage(ValueError, "not in "):
96+
apc.iter_abbreviations(
97+
{
98+
"abbreviations": {
99+
"state": {
100+
"Bol. Civ. Code": {
101+
"prefix": "bo-civ",
102+
"authority_type": "not_a_type",
103+
}
104+
}
105+
}
106+
}
107+
)
108+
109+
def test_valid_entry_normalised(self):
110+
out = apc.iter_abbreviations(
111+
{
112+
"abbreviations": {
113+
"municipal": {
114+
"Some Ord.": {"prefix": "bo-ord", "authority_type": "statute"}
115+
}
116+
}
117+
}
118+
)
119+
self.assertEqual(out["municipal"]["Some Ord."]["prefix"], "bo-ord")
120+
self.assertEqual(out["state"], {})
121+
122+
123+
class LoadYamlTests(SimpleTestCase):
124+
"""``_load_yaml`` never raises — bad / non-mapping YAML degrades to ``{}``."""
125+
126+
def test_malformed_yaml_returns_empty(self):
127+
with tempfile.TemporaryDirectory() as tmp:
128+
path = Path(tmp) / "m.yaml"
129+
path.write_text(_BAD_YAML, encoding="utf-8")
130+
self.assertEqual(apc._load_yaml(path), {})
131+
132+
def test_non_mapping_yaml_returns_empty(self):
133+
with tempfile.TemporaryDirectory() as tmp:
134+
path = Path(tmp) / "m.yaml"
135+
path.write_text("- just\n- a\n- list\n", encoding="utf-8")
136+
self.assertEqual(apc._load_yaml(path), {})
137+
138+
def test_validate_pack_taxonomy_extensions_passes_for_valid(self):
139+
with tempfile.TemporaryDirectory() as tmp:
140+
path = Path(tmp) / "m.yaml"
141+
path.write_text(
142+
"shape_rules:\n - pattern: '^bo-ley-\\d+$'\n", encoding="utf-8"
143+
)
144+
# Should not raise.
145+
apc.validate_pack_taxonomy_extensions(path)
146+
147+
148+
class IterPackMappingFilesSkipTests(SimpleTestCase):
149+
"""``_iter_pack_mapping_files`` skips packs it cannot use, never raises."""
150+
151+
def _patch_dirs(self, *dirs: Path):
152+
return mock.patch.object(apc, "authority_pack_dirs", return_value=list(dirs))
153+
154+
def test_pack_without_manifest_is_skipped(self):
155+
with tempfile.TemporaryDirectory() as tmp:
156+
pack = Path(tmp) / "no-manifest"
157+
pack.mkdir()
158+
with self._patch_dirs(pack):
159+
self.assertEqual(list(apc._iter_pack_mapping_files()), [])
160+
161+
def test_pack_with_malformed_manifest_is_skipped(self):
162+
with tempfile.TemporaryDirectory() as tmp:
163+
pack = Path(tmp) / "bad-manifest"
164+
pack.mkdir()
165+
(pack / "pack.yaml").write_text(_BAD_YAML, encoding="utf-8")
166+
with self._patch_dirs(pack):
167+
with self.assertLogs(_MODULE, level="WARNING"):
168+
self.assertEqual(list(apc._iter_pack_mapping_files()), [])
169+
170+
def test_pack_without_mappings_key_is_skipped(self):
171+
with tempfile.TemporaryDirectory() as tmp:
172+
pack = Path(tmp) / "no-mappings"
173+
pack.mkdir()
174+
(pack / "pack.yaml").write_text("name: x\n", encoding="utf-8")
175+
with self._patch_dirs(pack):
176+
self.assertEqual(list(apc._iter_pack_mapping_files()), [])
177+
178+
def test_well_formed_pack_is_yielded(self):
179+
with tempfile.TemporaryDirectory() as tmp:
180+
pack = Path(tmp) / "ok"
181+
pack.mkdir()
182+
(pack / "pack.yaml").write_text(
183+
"name: x\nmappings: m.yaml\n", encoding="utf-8"
184+
)
185+
(pack / "m.yaml").write_text("shape_rules: []\n", encoding="utf-8")
186+
with self._patch_dirs(pack):
187+
yielded = list(apc._iter_pack_mapping_files())
188+
self.assertEqual([p for p, _ in yielded], [pack])
189+
190+
191+
class RuntimeScanFaultIsolationTests(SimpleTestCase):
192+
"""A pack whose mappings are malformed is logged + skipped, not raised."""
193+
194+
def setUp(self):
195+
self.addCleanup(apc.reset_pack_config_cache)
196+
197+
def _patch_dirs(self, *dirs: Path):
198+
return mock.patch.object(apc, "authority_pack_dirs", return_value=list(dirs))
199+
200+
@staticmethod
201+
def _write_pack(root: Path, mappings_body: str) -> Path:
202+
pack = root / "broken-vocab"
203+
pack.mkdir()
204+
(pack / "pack.yaml").write_text("name: x\nmappings: m.yaml\n", encoding="utf-8")
205+
(pack / "m.yaml").write_text(mappings_body, encoding="utf-8")
206+
return pack
207+
208+
def test_malformed_shape_rules_skipped_not_raised(self):
209+
with tempfile.TemporaryDirectory() as tmp:
210+
# shape_rules is a string, not a list -> iter_shape_rules raises;
211+
# the scan must downgrade to a logged skip and return no rules.
212+
pack = self._write_pack(Path(tmp), "shape_rules: not-a-list\n")
213+
with self._patch_dirs(pack):
214+
apc.reset_pack_config_cache()
215+
with self.assertLogs(_MODULE, level="WARNING"):
216+
rules = apc.pack_declared_shape_rules()
217+
self.assertEqual(rules, ())
218+
219+
def test_malformed_abbreviations_skipped_not_raised(self):
220+
with tempfile.TemporaryDirectory() as tmp:
221+
pack = self._write_pack(Path(tmp), "abbreviations: not-a-mapping\n")
222+
with self._patch_dirs(pack):
223+
apc.reset_pack_config_cache()
224+
with self.assertLogs(_MODULE, level="WARNING"):
225+
state, municipal = apc.pack_declared_abbreviations()
226+
self.assertEqual((state, municipal), ({}, {}))

opencontractserver/tests/test_authority_pack_providers.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
from django.test import SimpleTestCase, override_settings
1818

1919
from opencontractserver.pipeline.registry import (
20+
authority_pack_dirs,
2021
get_all_authority_source_providers_cached,
2122
reset_registry,
2223
)
2324

25+
_REGISTRY_LOGGER = "opencontractserver.pipeline.registry"
26+
2427
# A minimal, importable provider shipped "inside a pack". Imported by file path
2528
# under a synthetic module name, so its real OpenContracts imports must resolve.
2629
_DEMO_PROVIDER_SRC = """
@@ -91,3 +94,61 @@ def test_provider_absent_without_pack_path(self):
9194
self.assertNotIn("DemoPackProvider", names)
9295
# The shipped core providers are still discovered.
9396
self.assertIn("CFRAuthoritySourceProvider", names)
97+
98+
def test_broken_pack_provider_is_logged_and_skipped(self):
99+
# A provider module that fails to import must be logged + skipped without
100+
# breaking discovery of the pack's other (valid) providers — one bad file
101+
# never crashes registry build.
102+
with tempfile.TemporaryDirectory() as tmp:
103+
pack = self._write_pack(Path(tmp))
104+
(pack / "providers" / "broken.py").write_text(
105+
"raise RuntimeError('boom in pack provider')\n", encoding="utf-8"
106+
)
107+
with override_settings(AUTHORITY_PACK_PATHS=[str(pack)]):
108+
reset_registry()
109+
with self.assertLogs(_REGISTRY_LOGGER, level="WARNING") as cm:
110+
names = {
111+
p.name for p in get_all_authority_source_providers_cached()
112+
}
113+
# The valid sibling still loads despite the broken module.
114+
self.assertIn("DemoPackProvider", names)
115+
self.assertTrue(
116+
any("Failed to import pack provider" in m for m in cm.output),
117+
cm.output,
118+
)
119+
120+
def test_duplicate_provider_prefix_is_warned(self):
121+
# Two providers claiming the same supported_prefixes family resolve
122+
# non-deterministically; the registry makes the shadowing install loud.
123+
with tempfile.TemporaryDirectory() as tmp:
124+
pack = self._write_pack(Path(tmp))
125+
dup_src = _DEMO_PROVIDER_SRC.replace("DemoPackProvider", "DupPackProvider")
126+
(pack / "providers" / "dup_provider.py").write_text(
127+
dup_src, encoding="utf-8"
128+
)
129+
with override_settings(AUTHORITY_PACK_PATHS=[str(pack)]):
130+
reset_registry()
131+
with self.assertLogs(_REGISTRY_LOGGER, level="WARNING") as cm:
132+
get_all_authority_source_providers_cached()
133+
self.assertTrue(
134+
any(
135+
"Duplicate authority-source-provider prefix" in m for m in cm.output
136+
),
137+
cm.output,
138+
)
139+
140+
141+
class AuthorityPackDirsTests(SimpleTestCase):
142+
"""``authority_pack_dirs`` never raises on a misconfigured setting entry."""
143+
144+
def setUp(self):
145+
self.addCleanup(reset_registry)
146+
147+
def test_non_directory_path_entry_is_warned_and_skipped(self):
148+
with override_settings(AUTHORITY_PACK_PATHS=["/no/such/authority/pack/dir"]):
149+
with self.assertLogs(_REGISTRY_LOGGER, level="WARNING") as cm:
150+
dirs = authority_pack_dirs()
151+
self.assertTrue(
152+
any("is not a directory" in m for m in cm.output), cm.output
153+
)
154+
self.assertNotIn(Path("/no/such/authority/pack/dir"), dirs)

0 commit comments

Comments
 (0)