Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 65 additions & 3 deletions opencontractserver/enrichment/services/corpus_reference_service.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
"""Read surface for ``CorpusReference`` rows.

Visibility derives from corpus visibility — ``CorpusReference`` carries no
Visibility derives from the readable parent corpus plus the readable source
and target objects carried by each row. ``CorpusReference`` carries no
per-object guardian rows in v1.
"""

from __future__ import annotations

from django.db.models import Q

from opencontractserver.annotations.models import CorpusReference
from opencontractserver.corpuses.models import Corpus
from opencontractserver.documents.models import Document
from opencontractserver.enrichment import constants as C
from opencontractserver.shared.services.base import BaseService

Expand All @@ -16,15 +20,73 @@ class CorpusReferenceService(BaseService):
"""Read surface for CorpusReference rows."""

@staticmethod
def visible_to_user(user):
def visible_to_user_by_source(user):
"""References whose parent corpus AND source document are visible.

Enforces corpus READ and source-annotation-document visibility, but
does NOT filter on the resolved *target* (document / corpus /
annotation). A citation made by a hidden document is suppressed (no
source leak), but a citation TO a hidden target is RETAINED so the
caller can degrade that target to a ghost rather than dropping the
reference outright.

Use this for aggregate surfaces that perform their own per-target
ghosting (the governance graph re-checks both endpoints and degrades
an invisible target to an external key node). For surfaces that expose
the target foreign keys directly (e.g. the ``corpusReferences``
GraphQL query), use :meth:`visible_to_user`, which additionally hides
references whose target is invisible.
"""
visible_corpora = Corpus.objects.visible_to_user(user)
visible_documents = Document.objects.visible_to_user(user)

return CorpusReference.objects.filter(
corpus__in=Corpus.objects.visible_to_user(user)
corpus__in=visible_corpora,
source_annotation__document__in=visible_documents,
)

@staticmethod
def visible_to_user(user):
"""Return only references whose exposed graph is visible to ``user``.

Corpus references are reachable from a readable corpus, but each row
also carries document- and corpus-scoped foreign keys. Apply the same
MIN(document_permission, corpus_permission) rule used by user-facing
corpus document surfaces so a readable corpus cannot disclose private
source annotations or private resolved targets.

Builds on :meth:`visible_to_user_by_source` (corpus + source) and adds
the target-visibility filter, so a reference is hidden when its
resolved target document / corpus / annotation is not visible. Callers
that ghost invisible targets themselves should use
:meth:`visible_to_user_by_source` instead so those references are not
dropped before they can be degraded.
"""
visible_corpora = Corpus.objects.visible_to_user(user)
visible_documents = Document.objects.visible_to_user(user)

return CorpusReferenceService.visible_to_user_by_source(user).filter(
(Q(target_document__isnull=True) | Q(target_document__in=visible_documents))
& (Q(target_corpus__isnull=True) | Q(target_corpus__in=visible_corpora))
& (
Q(target_annotation__isnull=True)
| Q(target_annotation__document__in=visible_documents)
)
)

@classmethod
def for_corpus(cls, user, corpus_id: int):
return cls.visible_to_user(user).filter(corpus_id=corpus_id)

@classmethod
def for_corpus_by_source(cls, user, corpus_id: int):
"""Corpus-scoped variant of :meth:`visible_to_user_by_source`.

For callers (the governance graph) that ghost invisible targets
themselves and so must not have target-hidden references pre-filtered.
"""
return cls.visible_to_user_by_source(user).filter(corpus_id=corpus_id)

@classmethod
def wanted_authorities(
cls,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@
Visibility rules:

* The source corpus must be READ-visible or the build returns ``None``.
* Reference rows are corpus-as-gate (``CorpusReferenceService``), but every
*document* surfaced as a node must itself be READ-visible: invisible source
documents drop their edges entirely; invisible target documents degrade to
external ghost nodes so titles never leak.
* Reference rows are sourced via
``CorpusReferenceService.for_corpus_by_source`` — corpus READ plus a visible
source document, with the resolved *target* deliberately NOT pre-filtered so
this build can apply its own per-endpoint rule: invisible source documents
drop their edges entirely; invisible target documents degrade to external
ghost nodes so titles never leak. (The strict ``visible_to_user`` is reserved
for surfaces that expose target FKs directly, e.g. the GraphQL
``corpusReferences`` query.)
* Only READ-visible target corpora are listed in ``corpora``.

The service returns plain data keyed by raw PKs / canonical keys; the GraphQL
Expand Down Expand Up @@ -71,7 +75,7 @@ def build(
return None

ref_rows = list(
CorpusReferenceService.for_corpus(user, corpus_pk)
CorpusReferenceService.for_corpus_by_source(user, corpus_pk)
.filter(reference_type=C.REF_LAW)
.exclude(canonical_key=None)
.values_list(
Expand Down
124 changes: 124 additions & 0 deletions opencontractserver/tests/test_corpus_reference_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from opencontractserver.corpuses.models import Corpus
from opencontractserver.documents.models import Document
from opencontractserver.enrichment import constants as C
from opencontractserver.enrichment.services import CorpusReferenceService

User = get_user_model()

Expand Down Expand Up @@ -114,3 +115,126 @@ def test_distinct_keyed_references_still_allowed(self):
creator=self.user,
)
assert CorpusReference.objects.filter(source_annotation=mention).count() == 2


class CorpusReferenceVisibilityTests(TestCase):
def setUp(self):
self.owner = User.objects.create_user(username="owner2", password="p")
self.viewer = User.objects.create_user(username="viewer", password="p")
self.corpus = Corpus.objects.create(
title="Readable Corpus", creator=self.owner, is_public=True
)
self.visible_doc = Document.objects.create(
title="Visible Doc", creator=self.owner, is_public=True
)
self.private_source_doc = Document.objects.create(
title="Private Source", creator=self.owner, is_public=False
)
self.private_target_doc = Document.objects.create(
title="Private Target", creator=self.owner, is_public=False
)
self.private_target_corpus = Corpus.objects.create(
title="Private Authority", creator=self.owner, is_public=False
)
self.label = self.corpus.ensure_label_and_labelset(
label_text=C.LABEL_REF_LAW,
creator_id=self.owner.id,
label_type=SPAN_LABEL,
)

def _mention(self, document: Document, start: int = 0) -> Annotation:
return Annotation.objects.create(
raw_text="mention",
page=1,
json={"start": start, "end": start + 7},
annotation_label=self.label,
document_id=document.id,
corpus=self.corpus,
creator=self.owner,
annotation_type=SPAN_LABEL,
)

def _reference(self, mention: Annotation, **kwargs) -> CorpusReference:
defaults = {
"corpus": self.corpus,
"reference_type": C.REF_LAW,
"source_annotation": mention,
"canonical_key": f"dgcl:{mention.id}",
"resolution_status": C.STATUS_EXTERNAL,
"creator": self.owner,
}
defaults.update(kwargs)
return CorpusReference.objects.create(**defaults)

def test_visible_to_user_requires_visible_source_document(self):
ref = self._reference(self._mention(self.private_source_doc))

assert (
not CorpusReferenceService.visible_to_user(self.viewer)
.filter(pk=ref.pk)
.exists()
)

def test_visible_to_user_requires_visible_target_document(self):
ref = self._reference(
self._mention(self.visible_doc), target_document=self.private_target_doc
)

assert (
not CorpusReferenceService.visible_to_user(self.viewer)
.filter(pk=ref.pk)
.exists()
)

def test_visible_to_user_requires_visible_target_corpus(self):
ref = self._reference(
self._mention(self.visible_doc), target_corpus=self.private_target_corpus
)

assert (
not CorpusReferenceService.visible_to_user(self.viewer)
.filter(pk=ref.pk)
.exists()
)

def test_visible_to_user_returns_reference_when_all_edges_visible(self):
ref = self._reference(self._mention(self.visible_doc))

assert (
CorpusReferenceService.visible_to_user(self.viewer)
.filter(pk=ref.pk)
.exists()
)

def test_visible_to_user_by_source_retains_hidden_target_for_ghosting(self):
# A reference with a visible source but a hidden target document is
# dropped by the strict ``visible_to_user`` (which backs the
# ``corpusReferences`` GraphQL surface that exposes target FKs) but
# RETAINED by ``visible_to_user_by_source`` so aggregate consumers (the
# governance graph) can degrade the hidden target to a ghost node rather
# than losing the citation entirely.
ref = self._reference(
self._mention(self.visible_doc), target_document=self.private_target_doc
)

assert (
not CorpusReferenceService.visible_to_user(self.viewer)
.filter(pk=ref.pk)
.exists()
)
assert (
CorpusReferenceService.visible_to_user_by_source(self.viewer)
.filter(pk=ref.pk)
.exists()
)

def test_visible_to_user_by_source_still_requires_visible_source(self):
# The source-privacy guard is preserved by the source-only variant: a
# citation made by a hidden document is never surfaced.
ref = self._reference(self._mention(self.private_source_doc))

assert (
not CorpusReferenceService.visible_to_user_by_source(self.viewer)
.filter(pk=ref.pk)
.exists()
)