From 794b597c702805c941d8b8441dc007368692347f Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Fri, 18 Jul 2025 16:54:25 +0200 Subject: [PATCH 1/3] Better support for external links (i.e., to online instances outside the local Collection) - Link objects are now properly serialized in JSON-LD - Links can now be annotated with a list of types that they may be pointing to - this allows collections that contain Links to validate - Link is now directly available in the top-level openminds module. --- pipeline/src/base.py | 7 ++++++- pipeline/src/init_template.py.txt | 2 +- pipeline/src/properties.py | 13 +++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pipeline/src/base.py b/pipeline/src/base.py index 47d4d703..2466cad7 100644 --- a/pipeline/src/base.py +++ b/pipeline/src/base.py @@ -232,9 +232,14 @@ def __init__(self, **properties): class Link: """Representation of a metadata node for which only the identifier is currently known.""" - def __init__(self, identifier): + def __init__(self, identifier, allowed_types=None): self.identifier = identifier + self.allowed_types = allowed_types + def to_jsonld(self): + return { + "@id": self.identifier + } class IRI: """ diff --git a/pipeline/src/init_template.py.txt b/pipeline/src/init_template.py.txt index 1ef601be..653c032a 100644 --- a/pipeline/src/init_template.py.txt +++ b/pipeline/src/init_template.py.txt @@ -6,6 +6,6 @@ openMINDS Python package __version__ = "{{version}}" -from .base import Node, EmbeddedMetadata, LinkedMetadata, IRI +from .base import Node, EmbeddedMetadata, LinkedMetadata, IRI, Link from .collection import Collection from .properties import Property diff --git a/pipeline/src/properties.py b/pipeline/src/properties.py index ba432353..f98d20c2 100644 --- a/pipeline/src/properties.py +++ b/pipeline/src/properties.py @@ -113,7 +113,10 @@ def validate(self, value, ignore=None): if not isinstance(value, (list, tuple)): value = [value] for item in value: - if not isinstance(item, self.types): + if not ( + isinstance(item, self.types) + or (isinstance(item, Link) and item.allowed_types == self.types) + ): if "type" not in ignore: failures["type"].append( f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " @@ -145,7 +148,10 @@ def validate(self, value, ignore=None): failures["multiplicity"].append( f"{self.name} does not accept multiple values, but contains {len(value)}" ) - elif not isinstance(value, self.types): + elif not ( + isinstance(value, self.types) + or (isinstance(value, Link) and value.allowed_types == self.types) + ): if "type" not in ignore: failures["type"].append( f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " @@ -163,7 +169,6 @@ def deserialize(self, data): Args: data: the JSON-LD data """ - # todo: check data type def deserialize_item(item): if self.types == (str,): @@ -190,7 +195,7 @@ def deserialize_item(item): if cls.type_ == item["@type"]: return cls.from_jsonld(item) else: - return Link(item["@id"]) + return Link(item["@id"], allowed_types=self.types) else: raise NotImplementedError() From 3023bf9a80e7abb5dc9d4da21b46379d8131969b Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Wed, 30 Jul 2025 15:08:18 +0200 Subject: [PATCH 2/3] Allow links with known type --- pipeline/src/properties.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pipeline/src/properties.py b/pipeline/src/properties.py index f98d20c2..efa36627 100644 --- a/pipeline/src/properties.py +++ b/pipeline/src/properties.py @@ -10,7 +10,7 @@ from typing import Optional, Union, Iterable from .registry import lookup -from .base import Node, IRI, Link, Node +from .base import Node, IRI, Link class Property: @@ -89,6 +89,10 @@ def types(self): self._resolved_types = True return self._types + @property + def is_link(self) -> bool: + return issubclass(self.types[0], Node) + def validate(self, value, ignore=None): """ Check whether `value` satisfies all constraints. @@ -170,6 +174,8 @@ def deserialize(self, data): data: the JSON-LD data """ # todo: check data type + link_keys = set(("@id", "@type")) + def deserialize_item(item): if self.types == (str,): if self.formatting != "text/plain": @@ -193,7 +199,16 @@ def deserialize_item(item): if "@type" in item: for cls in self.types: if cls.type_ == item["@type"]: - return cls.from_jsonld(item) + if set(item.keys()) == link_keys: + # if we only have @id and @type, it's a Link + return Link(item["@id"], allowed_types=[cls]) + else: + # otherwise it's a Node + return cls.from_jsonld(item) + raise TypeError( + f"Mismatched types. Data has '{item['@type']}' " + f"but property only allows {[cls.type_ for cls in self.types]}" + ) else: return Link(item["@id"], allowed_types=self.types) else: From 580182c463a79eb70350622abef46a99756717ce Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Wed, 30 Jul 2025 22:00:11 +0200 Subject: [PATCH 3/3] Fixes and test --- pipeline/src/properties.py | 17 +++++----- pipeline/tests/test_instantiation.py | 51 ++++++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/pipeline/src/properties.py b/pipeline/src/properties.py index efa36627..b326a603 100644 --- a/pipeline/src/properties.py +++ b/pipeline/src/properties.py @@ -13,6 +13,13 @@ from .base import Node, IRI, Link +def _could_be_instance(value, types): + """ + True if a Link's allowed types are consistent with the given types + """ + return isinstance(value, Link) and value.allowed_types and set(value.allowed_types).issubset(types) + + class Property: """ Representation of an openMINDS property (a metadata field). @@ -117,10 +124,7 @@ def validate(self, value, ignore=None): if not isinstance(value, (list, tuple)): value = [value] for item in value: - if not ( - isinstance(item, self.types) - or (isinstance(item, Link) and item.allowed_types == self.types) - ): + if not (isinstance(item, self.types) or _could_be_instance(item, self.types)): if "type" not in ignore: failures["type"].append( f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " @@ -152,10 +156,7 @@ def validate(self, value, ignore=None): failures["multiplicity"].append( f"{self.name} does not accept multiple values, but contains {len(value)}" ) - elif not ( - isinstance(value, self.types) - or (isinstance(value, Link) and value.allowed_types == self.types) - ): + elif not (isinstance(value, self.types) or _could_be_instance(value, self.types)): if "type" not in ignore: failures["type"].append( f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " diff --git a/pipeline/tests/test_instantiation.py b/pipeline/tests/test_instantiation.py index 45f85e1b..02c982f0 100644 --- a/pipeline/tests/test_instantiation.py +++ b/pipeline/tests/test_instantiation.py @@ -4,7 +4,7 @@ import pytest -from openminds.base import Node, IRI +from openminds.base import Node, IRI, Link from openminds.latest import ( chemicals, computation, @@ -33,7 +33,9 @@ def classes_in_module(module): contents = [getattr(module, name) for name in dir(module)] - return [item for item in contents if isinstance(item, type) and issubclass(item, Node)] + return [ + item for item in contents if isinstance(item, type) and issubclass(item, Node) + ] def test_instantiation_random_data(): @@ -55,7 +57,10 @@ def test_json_roundtrip(): def test_IRI(): - valid_iris = ["https://example.com/path/to/my/file.txt", "file:///path/to/my/file.txt"] + valid_iris = [ + "https://example.com/path/to/my/file.txt", + "file:///path/to/my/file.txt", + ] for value in valid_iris: iri = IRI(value) assert iri.value == value @@ -69,3 +74,43 @@ def test_IRI(): with pytest.raises(ValueError) as exc_info: iri = IRI(value) assert exc_info.value.args[0] == "Invalid IRI" + + +def test_link(): + from openminds.v4.controlled_terms import Species + from openminds.v4.core import DatasetVersion + + maybe_mouse = Link("https://openminds.om-i.org/instances/species/musMusculus") + + definitely_mouse = Link( + "https://openminds.om-i.org/instances/species/musMusculus", + allowed_types=[Species], + ) + + my_dsv1 = DatasetVersion(study_targets=[maybe_mouse]) + failures1 = my_dsv1.validate(ignore=["required"]) + assert len(failures1["type"]) == 1 + assert "study_targets" in failures1["type"][0] + + my_dsv2 = DatasetVersion(study_targets=[definitely_mouse]) + failures2 = my_dsv2.validate(ignore=["required"]) + assert len(failures2) == 0 + + expected = { + "@context": { + "@vocab": "https://openminds.om-i.org/props/", + }, + "@type": "https://openminds.om-i.org/types/DatasetVersion", + "studyTarget": [ + { + "@id": "https://openminds.om-i.org/instances/species/musMusculus", + }, + ], + } + assert my_dsv1.to_jsonld( + include_empty_properties=False, + embed_linked_nodes=False + ) == my_dsv2.to_jsonld( + include_empty_properties=False, + embed_linked_nodes=False + ) == expected