Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pipeline/src/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,14 @@ def __init__(self, **properties):
class Link:
"""Representation of a metadata node for which only the identifier is currently known."""

def __init__(self, identifier):
def __init__(self, identifier, allowed_types=None):
self.identifier = identifier
self.allowed_types = allowed_types

def to_jsonld(self):
return {
"@id": self.identifier
}

class IRI:
"""
Expand Down
2 changes: 1 addition & 1 deletion pipeline/src/init_template.py.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ openMINDS Python package

__version__ = "{{version}}"

from .base import Node, EmbeddedMetadata, LinkedMetadata, IRI
from .base import Node, EmbeddedMetadata, LinkedMetadata, IRI, Link
from .collection import Collection
from .properties import Property
33 changes: 27 additions & 6 deletions pipeline/src/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@
from typing import Optional, Union, Iterable

from .registry import lookup
from .base import Node, IRI, Link, Node
from .base import Node, IRI, Link


def _could_be_instance(value, types):
"""
True if a Link's allowed types are consistent with the given types
"""
return isinstance(value, Link) and value.allowed_types and set(value.allowed_types).issubset(types)


class Property:
Expand Down Expand Up @@ -89,6 +96,10 @@ def types(self):
self._resolved_types = True
return self._types

@property
def is_link(self) -> bool:
return issubclass(self.types[0], Node)

def validate(self, value, ignore=None):
"""
Check whether `value` satisfies all constraints.
Expand All @@ -113,7 +124,7 @@ def validate(self, value, ignore=None):
if not isinstance(value, (list, tuple)):
value = [value]
for item in value:
if not isinstance(item, self.types):
if not (isinstance(item, self.types) or _could_be_instance(item, self.types)):
if "type" not in ignore:
failures["type"].append(
f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, "
Expand Down Expand Up @@ -145,7 +156,7 @@ def validate(self, value, ignore=None):
failures["multiplicity"].append(
f"{self.name} does not accept multiple values, but contains {len(value)}"
)
elif not isinstance(value, self.types):
elif not (isinstance(value, self.types) or _could_be_instance(value, self.types)):
if "type" not in ignore:
failures["type"].append(
f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, "
Expand All @@ -163,8 +174,9 @@ def deserialize(self, data):
Args:
data: the JSON-LD data
"""

# todo: check data type
link_keys = set(("@id", "@type"))

def deserialize_item(item):
if self.types == (str,):
if self.formatting != "text/plain":
Expand All @@ -188,9 +200,18 @@ def deserialize_item(item):
if "@type" in item:
for cls in self.types:
if cls.type_ == item["@type"]:
return cls.from_jsonld(item)
if set(item.keys()) == link_keys:
# if we only have @id and @type, it's a Link
return Link(item["@id"], allowed_types=[cls])
else:
# otherwise it's a Node
return cls.from_jsonld(item)
raise TypeError(
f"Mismatched types. Data has '{item['@type']}' "
f"but property only allows {[cls.type_ for cls in self.types]}"
)
else:
return Link(item["@id"])
return Link(item["@id"], allowed_types=self.types)
else:
raise NotImplementedError()

Expand Down
51 changes: 48 additions & 3 deletions pipeline/tests/test_instantiation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from openminds.base import Node, IRI
from openminds.base import Node, IRI, Link
from openminds.latest import (
chemicals,
computation,
Expand Down Expand Up @@ -33,7 +33,9 @@

def classes_in_module(module):
contents = [getattr(module, name) for name in dir(module)]
return [item for item in contents if isinstance(item, type) and issubclass(item, Node)]
return [
item for item in contents if isinstance(item, type) and issubclass(item, Node)
]


def test_instantiation_random_data():
Expand All @@ -55,7 +57,10 @@ def test_json_roundtrip():


def test_IRI():
valid_iris = ["https://example.com/path/to/my/file.txt", "file:///path/to/my/file.txt"]
valid_iris = [
"https://example.com/path/to/my/file.txt",
"file:///path/to/my/file.txt",
]
for value in valid_iris:
iri = IRI(value)
assert iri.value == value
Expand All @@ -69,3 +74,43 @@ def test_IRI():
with pytest.raises(ValueError) as exc_info:
iri = IRI(value)
assert exc_info.value.args[0] == "Invalid IRI"


def test_link():
from openminds.v4.controlled_terms import Species
from openminds.v4.core import DatasetVersion

maybe_mouse = Link("https://openminds.om-i.org/instances/species/musMusculus")

definitely_mouse = Link(
"https://openminds.om-i.org/instances/species/musMusculus",
allowed_types=[Species],
)
Comment on lines +85 to +93

@Raphael-Gazzotti Raphael-Gazzotti Jul 31, 2025

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to avoid hardcoding the version and namespace of the instances.

I would also move:
"definitely_mouse = Link(
"https://openminds.om-i.org/instances/species/musMusculus",
allowed_types=[Species],
)"

just on top of "my_dsv2 = DatasetVersion(study_targets=[definitely_mouse])"

@apdavison apdavison Jul 31, 2025

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"better to avoid hardcoding" - do you mean the test should iterate over all versions, and introspect the namespace from the version?

"would also move" - I think this is a matter of taste. I think that by grouping the Link creation on consecutive lines it makes the test easier to understand.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes to the first option, because it will require to be adjusted for each new version (v5.0 and above).
The second point seems fair.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't need to be adjusted for new versions, the test would work with any openminds version, I chose v4 for no particular reason.


my_dsv1 = DatasetVersion(study_targets=[maybe_mouse])
failures1 = my_dsv1.validate(ignore=["required"])
assert len(failures1["type"]) == 1
assert "study_targets" in failures1["type"][0]

my_dsv2 = DatasetVersion(study_targets=[definitely_mouse])
failures2 = my_dsv2.validate(ignore=["required"])
assert len(failures2) == 0

expected = {
"@context": {
"@vocab": "https://openminds.om-i.org/props/",
},
"@type": "https://openminds.om-i.org/types/DatasetVersion",
"studyTarget": [
{
"@id": "https://openminds.om-i.org/instances/species/musMusculus",
},
],
}
assert my_dsv1.to_jsonld(
include_empty_properties=False,
embed_linked_nodes=False
) == my_dsv2.to_jsonld(
include_empty_properties=False,
embed_linked_nodes=False
) == expected