Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions osf/models/provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import requests
from jsonschema import validate as jsonschema_validate, ValidationError as JsonSchemaValidationError

from django.apps import apps
from django.contrib.postgres import fields
Expand All @@ -20,6 +21,7 @@
from .brand import Brand
from .citation import CitationStyle
from .licenses import NodeLicense
from .cedar_metadata import CedarMetadataRecord
from .storage import ProviderAssetFile
from .subject import Subject
from osf.utils.datetime_aware_jsonfield import DateTimeAwareJSONField
Expand Down Expand Up @@ -257,6 +259,27 @@ def setup_share_source(self, provider_home_page):

self.save()

def validate_required_metadata(self, osf_obj):
if not self.required_metadata_template:
return

record = CedarMetadataRecord.objects.filter(
guid__in=osf_obj.guids.all(),
template=self.required_metadata_template,
is_published=True,
).first()

if record is None:
raise ValidationError(
f'Object must have a published CEDAR metadata record for the required template '
f'"{self.required_metadata_template.schema_name}".'
)

try:
jsonschema_validate(record.metadata, self.required_metadata_template.template)
except JsonSchemaValidationError as e:
raise ValidationError(e.message)


class CollectionProvider(AbstractProvider):
DEFAULT_SUBSCRIPTIONS = [
Expand Down
120 changes: 120 additions & 0 deletions osf_tests/test_validate_required_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import pytest
from faker import Faker
from django.core.exceptions import ValidationError

from osf.models import CedarMetadataRecord, CedarMetadataTemplate
from osf_tests.factories import AuthUserFactory, PreprintFactory, PreprintProviderFactory

fake = Faker()

VALID_JSONSCHEMA = {
'$schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': {
'title': {'type': 'string'},
},
'required': ['title'],
}


@pytest.fixture()
def user():
return AuthUserFactory()


@pytest.fixture()
def provider():
return PreprintProviderFactory()


@pytest.fixture()
def cedar_template():
return CedarMetadataTemplate.objects.create(
schema_name=fake.bs(),
cedar_id=fake.md5(),
template_version=1,
template=VALID_JSONSCHEMA,
active=True,
)


@pytest.fixture()
def preprint(user, provider):
return PreprintFactory(creator=user, provider=provider)


@pytest.mark.django_db
class TestValidateRequiredMetadata:

def test_no_required_template_passes(self, provider, preprint):
assert provider.required_metadata_template is None
provider.validate_required_metadata(preprint)

def test_missing_record_raises(self, provider, cedar_template, preprint):
provider.required_metadata_template = cedar_template
provider.save()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious as I'm not familiar with this functionality yet. Shouldn't we call validate_required_metadata during save call to prevent saving cedar template without cedar metadata record existence?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method validates a specific osf_obj (preprint/node) against the provider's required template — not the provider itself. It's intended to be called at publish/submission time for a given object (e.g. before a preprint goes public). Calling it in Provider.save() wouldn't make sense since there's no single object to validate there — a provider may have thousands of associated preprints.


with pytest.raises(ValidationError, match='published CEDAR metadata record'):
provider.validate_required_metadata(preprint)

def test_unpublished_record_raises(self, provider, cedar_template, preprint):
provider.required_metadata_template = cedar_template
provider.save()

CedarMetadataRecord.objects.create(
guid=preprint.guids.first(),
template=cedar_template,
metadata={'title': 'My Preprint'},
is_published=False,
)

with pytest.raises(ValidationError, match='published CEDAR metadata record'):
provider.validate_required_metadata(preprint)

def test_published_valid_record_passes(self, provider, cedar_template, preprint):
provider.required_metadata_template = cedar_template
provider.save()

CedarMetadataRecord.objects.create(
guid=preprint.guids.first(),
template=cedar_template,
metadata={'title': 'My Preprint'},
is_published=True,
)

provider.validate_required_metadata(preprint)

def test_published_invalid_record_raises(self, provider, cedar_template, preprint):
provider.required_metadata_template = cedar_template
provider.save()

CedarMetadataRecord.objects.create(
guid=preprint.guids.first(),
template=cedar_template,
metadata={'title': 123},
is_published=True,
)

with pytest.raises(ValidationError):
provider.validate_required_metadata(preprint)

def test_record_for_wrong_template_raises(self, provider, cedar_template, preprint):
provider.required_metadata_template = cedar_template
provider.save()

other_template = CedarMetadataTemplate.objects.create(
schema_name=fake.bs(),
cedar_id=fake.md5(),
template_version=1,
template=VALID_JSONSCHEMA,
active=True,
)
CedarMetadataRecord.objects.create(
guid=preprint.guids.first(),
template=other_template,
metadata={'title': 'My Preprint'},
is_published=True,
)

with pytest.raises(ValidationError, match='published CEDAR metadata record'):
provider.validate_required_metadata(preprint)
Loading