Skip to content

Commit f0e2d1a

Browse files
committed
Address PR #2050 review: test constants, mark() guard, can_handle contract
Respond to the automated review on PR #2050: - Replace the bare discovery-state literals in the three new regression tests with the C.DISCOVERY_STATE_* constants, so the new tests follow the same single-source-of-truth the PR establishes (no within-method "ingested" constant vs "failed"/"queued" literal mix). - mark(): add a guard rejecting error= alongside a terminal SUCCESS state, mirroring the existing exclusivity guards. This makes the "an ingested row carries no error" invariant explicit rather than relying on no caller ever passing the contradiction. - Document on BaseAuthoritySourceProvider.can_handle that overrides MUST be stateless — the orchestrator now probes one instance with multiple candidate keys and reuses it for locate/fetch, so the purity the optimisation relies on is codified for future provider authors. Declined the list->tuple nit for DISCOVERY_STATE_CHOICES: the field's choices match migration 0100's recorded list, and no code mutates it, so keeping the list avoids any migration-state churn for a marginal immutability gain.
1 parent 17d2ae4 commit f0e2d1a

3 files changed

Lines changed: 31 additions & 6 deletions

File tree

opencontractserver/enrichment/services/authority_frontier_service.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,14 @@ def mark(
277277
raise ValueError("mark(): error and clear_error are exclusive.")
278278
if set_provider is not None and clear_provider:
279279
raise ValueError("mark(): set_provider and clear_provider are exclusive.")
280+
if error is not None and state in C.DISCOVERY_SUCCESS_STATES:
281+
# A terminal SUCCESS row must carry no error (the invariant the
282+
# last_error auto-clear below establishes); refuse the contradiction
283+
# loudly rather than persist an "ingested but errored" row.
284+
raise ValueError(
285+
"mark(): cannot set error on a terminal success transition "
286+
f"({state!r}); omit error= (success states clear last_error)."
287+
)
280288

281289
row.discovery_state = state
282290
row.last_attempt = timezone.now()

opencontractserver/pipeline/base/base_authority_source_provider.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ class BaseAuthoritySourceProvider(PipelineComponentBase, ABC):
5555

5656
# ---- public API (registry/orchestrator calls these) ---------------------
5757
def can_handle(self, canonical_key: str) -> bool:
58+
"""Return True if this provider serves *canonical_key*.
59+
60+
MUST be stateless. The discovery orchestrator instantiates each provider
61+
once per ``_provider_for`` call and probes the SAME instance with multiple
62+
candidate keys (then reuses it for ``locate``/``fetch``), so an override
63+
that stashed match state on ``self`` here would leak stale state across
64+
keys. Derive everything ``locate``/``fetch`` need from the key passed to
65+
them instead.
66+
"""
5867
prefix = canonical_key.split(":", 1)[0]
5968
return prefix in self.supported_prefixes
6069

opencontractserver/tests/test_authority_frontier.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def test_dequeue_for_provider_excludes_unprovidered_rows(self):
339339
AuthorityFrontier.objects.create(
340340
canonical_key="usc-15:78j-noprov",
341341
authority="usc-15",
342-
discovery_state="queued",
342+
discovery_state=C.DISCOVERY_STATE_QUEUED,
343343
provider=None,
344344
mention_count=10,
345345
)
@@ -453,15 +453,19 @@ def test_mark_ingested_clears_stale_error_from_prior_failure(self):
453453
caller does NOT pass clear_error=True — ``mark()`` clears it implicitly on
454454
the SUCCESS transition (C.DISCOVERY_SUCCESS_STATES).
455455
"""
456-
row = self._make_row("usc-15:78j-retry", provider="TestProv", state="failed")
457-
AuthorityFrontierService.mark(row, "failed", error="timeout fetching USLM")
456+
row = self._make_row(
457+
"usc-15:78j-retry", provider="TestProv", state=C.DISCOVERY_STATE_FAILED
458+
)
459+
AuthorityFrontierService.mark(
460+
row, C.DISCOVERY_STATE_FAILED, error="timeout fetching USLM"
461+
)
458462
row.refresh_from_db()
459463
self.assertEqual(row.last_error, "timeout fetching USLM")
460464

461465
# Successful retry, no explicit clear_error.
462466
AuthorityFrontierService.mark(row, C.DISCOVERY_STATE_INGESTED)
463467
row.refresh_from_db()
464-
self.assertEqual(row.discovery_state, "ingested")
468+
self.assertEqual(row.discovery_state, C.DISCOVERY_STATE_INGESTED)
465469
self.assertIsNone(row.last_error)
466470

467471
def test_mark_non_success_state_preserves_error(self):
@@ -471,9 +475,13 @@ def test_mark_non_success_state_preserves_error(self):
471475
error message keeps that message so operators can see why it failed.
472476
"""
473477
row = self._make_row(
474-
"usc-15:78j-stillbad", provider="TestProv", state="in_progress"
478+
"usc-15:78j-stillbad",
479+
provider="TestProv",
480+
state=C.DISCOVERY_STATE_IN_PROGRESS,
481+
)
482+
AuthorityFrontierService.mark(
483+
row, C.DISCOVERY_STATE_FAILED, error="still broken"
475484
)
476-
AuthorityFrontierService.mark(row, "failed", error="still broken")
477485
row.refresh_from_db()
478486
self.assertEqual(row.last_error, "still broken")
479487

0 commit comments

Comments
 (0)