Skip to content

Commit 725591b

Browse files
committed
Add Collection class
1 parent 4125c40 commit 725591b

5 files changed

Lines changed: 139 additions & 5 deletions

File tree

fairgraph/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .embedded import EmbeddedMetadata
2626
from .kgproxy import KGProxy
2727
from .kgquery import KGQuery
28+
from .collection import Collection
2829
from . import client, errors, openminds, utility
2930

3031
__version__ = "0.12.2"

fairgraph/client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,8 @@ def create_new_instance(
461461
"""
462462
if "'@id': None" in str(data):
463463
raise ValueError("payload contains undefined ids")
464+
if instance_id:
465+
UUID(instance_id)
464466
if self.migrated is False:
465467
data = deepcopy(data)
466468
adapt_namespaces_4to3(data)
@@ -488,6 +490,7 @@ def update_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocum
488490
instance_id (UUID): the instance's persistent identifier.
489491
data (dict): a JSON-LD document that modifies some or all of the data of the existing instance.
490492
"""
493+
UUID(instance_id)
491494
if self.migrated is False:
492495
data = deepcopy(data)
493496
adapt_namespaces_4to3(data)
@@ -507,6 +510,7 @@ def replace_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocu
507510
instance_id (UUID): the instance's persistent identifier.
508511
data (dict): a JSON-LD document that will replace the existing instance.
509512
"""
513+
UUID(instance_id)
510514
if self.migrated is False:
511515
data = deepcopy(data)
512516
adapt_namespaces_4to3(data)
@@ -522,6 +526,7 @@ def delete_instance(self, instance_id: str, ignore_not_found: bool = True, ignor
522526
"""
523527
Delete a KG instance.
524528
"""
529+
UUID(instance_id)
525530
response = self._kg_client.instances.delete(instance_id)
526531
# response is None if no errors
527532
if response: # error

fairgraph/collection.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
This module provides the Collection class, an extension to the openMINDS Collection
3+
that knows how to upload metadata to the KG.
4+
"""
5+
6+
# Copyright 2018-2024 CNRS
7+
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
11+
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
19+
20+
from importlib import import_module
21+
import os
22+
from time import sleep
23+
from uuid import uuid4
24+
from warnings import warn
25+
26+
from openminds import Collection as OMCollection
27+
from .utility import ActivityLog
28+
from .errors import AuthenticationError
29+
30+
31+
class Collection(OMCollection):
32+
"""
33+
A collection of metadata nodes that can be saved to
34+
and loaded from disk, and uploaded to the KG.
35+
36+
Args
37+
----
38+
39+
*nodes (LinkedMetadata):
40+
Nodes to store in the collection when creating it.
41+
Child nodes that are referenced from the explicitly
42+
listed nodes will also be added.
43+
"""
44+
45+
def load(self, *paths):
46+
import_module("fairgraph.openminds")
47+
super().load(*paths)
48+
49+
def upload(self, client, default_space=None, space_map=None, verbosity=0):
50+
# if not self.complete:
51+
# raise Exception("Collection contains local ids. Run `generate_ids()` and then re-save the collection")
52+
# # self.generate_ids(lambda node: client.uri_from_uuid(uuid4()))
53+
54+
nodes_to_save = [
55+
node
56+
for node in self.sort_nodes_for_upload()
57+
if not node.id.startswith("https://openminds.om-i.org/instances")
58+
]
59+
activity_log = ActivityLog()
60+
61+
if verbosity == 1:
62+
try:
63+
tqdm = import_module("tqdm")
64+
except ImportError:
65+
warn("Unable to show progress bar, please install tqdm")
66+
else:
67+
nodes_to_save = tqdm.tqdm(nodes_to_save)
68+
69+
if os.path.exists(".kg_upload_log.txt"):
70+
with open(".kg_upload_log.txt") as fp:
71+
skip = fp.read().strip().split("\n")
72+
else:
73+
skip = None
74+
75+
for i, node in enumerate(nodes_to_save):
76+
if not (skip and node.id in skip):
77+
if verbosity == 2:
78+
print(f"[{100*i//len(nodes_to_save)}%] Saving {node.__class__.__name__} {node.id}")
79+
if space_map:
80+
target_space = space_map.get(node.__class__, default_space)
81+
else:
82+
target_space = default_space
83+
original_node_id = node.id
84+
try:
85+
node.save(
86+
client, space=target_space, recursive=False, ignore_duplicates=True, activity_log=activity_log
87+
)
88+
except AuthenticationError as err:
89+
# client.refresh()
90+
print(err)
91+
break
92+
except Exception as err:
93+
if "500" in str(err):
94+
sleep(5)
95+
node.save(
96+
client,
97+
space=target_space,
98+
recursive=False,
99+
ignore_duplicates=True,
100+
activity_log=activity_log,
101+
)
102+
else:
103+
raise
104+
else:
105+
with open(".kg_upload_log.txt", "a") as fp:
106+
fp.write(f"{original_node_id}\n")
107+
108+
return activity_log

fairgraph/kgobject.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,14 @@ def __init__(
8989

9090
self._raw_remote_data = None
9191
self.remote_data = {}
92-
if self.id:
92+
if self.id and self.id.startswith("http"):
9393
# we store the original remote data in `_raw_remote_data`
9494
# and a normalized version in `remote_data`
9595
self._raw_remote_data = data # for debugging
9696
if data:
9797
self.remote_data = normalize_data(
98-
self.to_jsonld(include_empty_properties=True, embed_linked_nodes=False), self.context
98+
self.to_jsonld(include_empty_properties=True, embed_linked_nodes=False),
99+
data.get("@context", self.context)
99100
)
100101

101102
def __repr__(self):
@@ -352,7 +353,8 @@ def from_alias(
352353
def uuid(self) -> Union[str, None]:
353354
# todo: consider using client._kg_client.uuid_from_absolute_id
354355
if self.id is not None:
355-
return self.id.split("/")[-1]
356+
value = self.id.split("/")[-1]
357+
return str(UUID(value))
356358
else:
357359
return None
358360

@@ -548,7 +550,7 @@ def diff(self, other):
548550
def exists(self, client: KGClient, ignore_duplicates: bool = False, in_spaces: Optional[List[str]] = None) -> bool:
549551
"""Check if this object already exists in the KnowledgeGraph"""
550552

551-
if self.id:
553+
if self.id and self.id.startswith("http"):
552554
# Since the KG now allows user-specified IDs we can't assume that the presence of
553555
# an id means the object exists
554556
data = client.instance_from_full_uri(
@@ -601,6 +603,7 @@ def exists(self, client: KGClient, ignore_duplicates: bool = False, in_spaces: O
601603

602604
if instances:
603605
if len(instances) > 1 and not ignore_duplicates:
606+
# we might want to consider running a second query with "equals" rather than "contains"
604607
raise Exception(
605608
f"Existence query is not specific enough. Type: {self.__class__.__name__}; filters: {query_filter}"
606609
)
@@ -792,9 +795,13 @@ def save(
792795
# create new
793796
local_data = normalize_data(self.to_jsonld(embed_linked_nodes=False), self.context)
794797
logger.info(" - creating instance with data {}".format(local_data))
798+
if self.id and self.id.startswith("http"):
799+
instance_id = self.uuid
800+
else:
801+
instance_id = None
795802
try:
796803
instance_data = client.create_new_instance(
797-
local_data, space or self.__class__.default_space, instance_id=self.uuid
804+
local_data, space or self.__class__.default_space, instance_id=instance_id
798805
)
799806
except (AuthorizationError, ResourceExistsError) as err:
800807
if ignore_auth_errors:

fairgraph/utility.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ def normalize_data(data: Union[None, JSONdict], context: Dict[str, Any]) -> Unio
220220
pass
221221
elif value is None:
222222
pass
223+
elif expanded_key == "@id":
224+
if value.startswith("http"):
225+
# do not take local ids, e.g., those starting with "_"
226+
normalized[expanded_key] = value
223227
elif expanded_key == "@type":
224228
normalized[expanded_key] = value
225229
elif isinstance(value, (list, tuple)):
@@ -327,6 +331,15 @@ def __init__(
327331
def __repr__(self):
328332
return f"{self.type}: {self.cls}({self.id}) in '{self.space}'"
329333

334+
def as_dict(self):
335+
return {
336+
"cls": self.cls,
337+
"id": self.id,
338+
"delta": self.delta,
339+
"space": self.space,
340+
"type_": self.type
341+
}
342+
330343

331344
class ActivityLog:
332345
"""

0 commit comments

Comments
 (0)