Skip to content

Commit 985b49e

Browse files
Keywords with context input (#513)
# Description Added ` context` argument to get_keywords(). Some functionality that should have been in PR #513 has leaked into this PR ## Type of change - [ ] Bug fix and code cleanup - [ ] New feature - [ ] Documentation update - [ ] Testing ## Checklist for the reviewer This checklist should be used as a help for the reviewer. - [ ] Is the change limited to one issue? - [ ] Does this PR close the issue? - [ ] Is the code easy to read and understand? - [ ] Do all new feature have an accompanying new test? - [ ] Has the documentation been updated as necessary? - [ ] Is the code properly tested? --------- Co-authored-by: Francesca.L.Bleken@sintef.no <francesca.l.bleken@sintef.no> Co-authored-by: Francesca L. Bleken <48128015+francescalb@users.noreply.github.com>
1 parent b793b49 commit 985b49e

11 files changed

Lines changed: 401 additions & 97 deletions

File tree

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2022-2025 SINTEF
3+
Copyright (c) 2022-2026 SINTEF
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,12 @@ addopts = """-rs --cov=tripper --cov-report=term \
144144
"""
145145
filterwarnings = [
146146
"ignore:.*imp module.*:DeprecationWarning",
147+
"ignore:ConjunctiveGraph.*:DeprecationWarning", # in pyld
148+
"ignore:builtin type SwigPy.*:DeprecationWarning", # in pyld
147149
"ignore:::tripper.literal:243", # Ignore warning in doctest
148150
]
149151

152+
150153
[tool.setuptools.package-data]
151154
"tripper.context" = ["*.json", "*.yaml"]
152155

tests/datadoc/test_context.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,33 @@ def test_get_prefixes():
9494
assert "mediaType" not in prefixes
9595

9696

97+
def test_get_properties():
98+
"""Test get_properties() method."""
99+
properties = ctx.get_properties()
100+
assert "adms" not in properties # prefix is not a property
101+
assert "Document" not in properties # class is not a property
102+
assert properties["mediaType"] == "http://www.w3.org/ns/dcat#mediaType"
103+
104+
105+
def test_get_object_properties():
106+
"""Test get_object_properties() method."""
107+
from tripper import DCTERMS
108+
109+
objprop = ctx.get_object_properties()
110+
assert "adms" not in objprop # prefix is not an object property
111+
assert "Document" not in objprop # class is not an object property
112+
assert "title" not in objprop # annotation is not an object property
113+
assert objprop["hasPart"] == DCTERMS.hasPart
114+
115+
116+
def test_get_classes():
117+
"""Test get_prefixes() method."""
118+
classes = ctx.get_classes()
119+
assert "adms" not in classes
120+
assert "mediaType" not in classes
121+
assert classes["Document"] == "http://xmlns.com/foaf/0.1/Document"
122+
123+
97124
def test_sync_prefixes():
98125
"""Test sync_prefixes() method."""
99126
from tripper import Triplestore

tests/datadoc/test_datadoc_utils.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,32 @@ def test_iriname():
112112
assert iriname("abc") == "abc"
113113
assert iriname("rdf:JSON") == "JSON"
114114
assert iriname("https://w3id.org/emmo#Ampere") == "Ampere"
115+
116+
117+
def test_getlabel():
118+
"""Test utility function getlabel()."""
119+
from tripper import SKOS
120+
from tripper.datadoc.errors import InvalidDatadocError
121+
from tripper.datadoc.utils import getlabel
122+
123+
assert getlabel({"@id": "ex:A", "prefLabel": "a"}) == "a"
124+
assert getlabel({"@id": "ex:A", "label": "a"}) == "a"
125+
assert getlabel({"@id": "ex:A", "rdfs:label": "a"}) == "a"
126+
assert getlabel({"@id": "ex:A"}, default="a") == "a"
127+
assert getlabel({"@id": "ex:A"}) == "A"
128+
129+
# Check for precedence of labels
130+
assert (
131+
getlabel({"@id": "ex:A", "rdfs:label": "a", "prefLabel": "b"}) == "a"
132+
)
133+
assert (
134+
getlabel({"@id": "ex:A", "rdfs:label": "a", "skos:prefLabel": "b"})
135+
== "b"
136+
)
137+
assert (
138+
getlabel({"@id": "ex:A", "rdfs:label": "a", SKOS.prefLabel: "b"})
139+
== "b"
140+
)
141+
142+
with pytest.raises(InvalidDatadocError):
143+
getlabel({"x": "ex:A"})

tests/datadoc/test_dataset.py

Lines changed: 102 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,48 @@ def test_store():
402402
}
403403

404404

405+
def test_update_context():
406+
"""Test update_context()."""
407+
from tripper import HUME, OWL, Namespace
408+
from tripper.datadoc import get_context
409+
from tripper.datadoc.dataset import update_context
410+
411+
EX = Namespace("http://example.com/")
412+
sources = {
413+
"@context": {
414+
"ex": str(EX),
415+
"hume": str(HUME),
416+
},
417+
"@graph": [
418+
{
419+
# Instances are not added to context
420+
"@id": "ex:instr",
421+
"@type": "hume:Device",
422+
},
423+
{
424+
# Not added to context, since there is no @type
425+
"@id": "ex:instr2",
426+
},
427+
{
428+
"@id": "ex:MyDevice",
429+
"skos:prefLabel": "MyDevice",
430+
"subClassOf": "hume:Device",
431+
},
432+
],
433+
}
434+
context = get_context(default_theme=None)
435+
update_context(sources, context)
436+
c = context.get_context_dict()
437+
assert "instr" not in c
438+
assert "instr2" not in c
439+
assert "MyDevice" in c
440+
assert c["MyDevice"] == {"@id": EX.MyDevice, "@type": OWL.Class}
441+
assert c["Device"] == {"@id": HUME.Device, "@type": OWL.Class}
442+
443+
# TODO: add tests for what happens if there is mismatch between
444+
# previously added context and updated_context...
445+
446+
405447
def test_infer_restriction_types():
406448
"""Test infer_restriction_types()."""
407449
from tripper import DCTERMS, HUME, RDFS, Namespace
@@ -426,7 +468,7 @@ def test_infer_restriction_types():
426468
"http://example.org#A": {
427469
DCTERMS.creator: "some",
428470
DCTERMS.hasPart: "value",
429-
DCTERMS.issued: "value",
471+
# DCTERMS.issued: "value",
430472
}
431473
}
432474

@@ -452,7 +494,7 @@ def test_infer_restriction_types():
452494
"@id": "ex:MyDevice",
453495
# "@type": "owl:Class",
454496
"subClassOf": HUME.Device,
455-
"hasPart": HUME.MeasuringInstrument,
497+
"hasPart": [HUME.MeasuringInstrument, "ex:MyDevice"],
456498
},
457499
],
458500
}
@@ -577,6 +619,13 @@ def test_update_restrictions():
577619
"@type": HUME.Device,
578620
"isDefinedBy": HUME.MeasuringInstrument,
579621
},
622+
{
623+
# An individial relating to two classes and an individual.
624+
# Should be converted to an existential restriction.
625+
"@id": "ex:instr3",
626+
"@type": HUME.Device,
627+
"hasPart": [HUME.MeasuringInstrument, "MyDevice", "ex:instr"],
628+
},
580629
{
581630
# A class relating to a class.
582631
# Should be converted to an existential restriction.
@@ -586,63 +635,68 @@ def test_update_restrictions():
586635
"@id": "ex:MyDevice",
587636
# "@type": "owl:Class",
588637
"subClassOf": HUME.Device,
638+
"label": "MyDevice",
589639
"hasPart": HUME.MeasuringInstrument,
590640
},
641+
{
642+
# A class relating to two classes
643+
"@id": "ex:MyDevice2",
644+
"@type": "owl:Class",
645+
"subClassOf": HUME.Device,
646+
"label": "MyDevice2",
647+
"hasPart": [HUME.MeasuringInstrument, "MyDevice"],
648+
},
649+
# TODO: for completeness, add tests for individual
650+
# relating to one individual and individual related to a
651+
# list of individuals
591652
],
592653
}
593654
r6 = deepcopy(d6)
594655
update_restrictions(r6, ctx)
595-
assert r6 == {
596-
"@context": {
597-
"MeasuringInstrument": {
598-
"@id": "https://w3id.org/emmo/hume#MeasuringInstrument",
599-
"@type": "owl:Class",
600-
}
601-
},
602-
"@graph": [
603-
{
604-
"@id": "ex:instr",
605-
"@type": "https://w3id.org/emmo/hume#Device",
606-
"isDefinedBy": "https://w3id.org/emmo/hume#MeasuringSystem",
607-
},
656+
res6 = {d["@id"]: d for d in r6["@graph"]}
657+
assert res6["ex:instr"] == {
658+
"@id": "ex:instr",
659+
"@type": "https://w3id.org/emmo/hume#Device",
660+
"isDefinedBy": "https://w3id.org/emmo/hume#MeasuringSystem",
661+
}
662+
assert res6["ex:instr2"] == {
663+
"@id": "ex:instr2",
664+
"@type": [
665+
"https://w3id.org/emmo/hume#Device",
608666
{
609-
"@id": "ex:instr2",
610-
"@type": [
611-
"https://w3id.org/emmo/hume#Device",
612-
{
613-
"@type": "owl:Restriction",
614-
"owl:onProperty": {
615-
"@id": (
616-
"http://www.w3.org/2000/01/rdf-schema#"
617-
"isDefinedBy"
618-
)
619-
},
620-
"owl:someValuesFrom": {
621-
"@id": (
622-
"https://w3id.org/emmo/hume#MeasuringInstrument"
623-
)
624-
},
625-
},
626-
],
667+
"@type": "owl:Restriction",
668+
"owl:onProperty": {
669+
"@id": "http://www.w3.org/2000/01/rdf-schema#isDefinedBy",
670+
},
671+
"owl:someValuesFrom": {
672+
"@id": "https://w3id.org/emmo/hume#MeasuringInstrument",
673+
},
627674
},
675+
],
676+
}
677+
assert res6["ex:instr3"] == {
678+
# WRONG! Should be converted to restrictions
679+
"@id": "ex:instr3",
680+
"@type": "https://w3id.org/emmo/hume#Device",
681+
"hasPart": [
682+
"https://w3id.org/emmo/hume#MeasuringInstrument",
683+
"MyDevice",
684+
"ex:instr",
685+
],
686+
}
687+
assert res6["ex:MyDevice"] == {
688+
"@id": "ex:MyDevice",
689+
"subClassOf": [
690+
"https://w3id.org/emmo/hume#Device",
628691
{
629-
"@id": "ex:MyDevice",
630-
"subClassOf": [
631-
"https://w3id.org/emmo/hume#Device",
632-
{
633-
"@type": "owl:Restriction",
634-
"owl:onProperty": {
635-
"@id": "http://purl.org/dc/terms/hasPart"
636-
},
637-
"owl:someValuesFrom": {
638-
"@id": (
639-
"https://w3id.org/emmo/hume#MeasuringInstrument"
640-
)
641-
},
642-
},
643-
],
692+
"@type": "owl:Restriction",
693+
"owl:onProperty": {"@id": "http://purl.org/dc/terms/hasPart"},
694+
"owl:someValuesFrom": {
695+
"@id": "https://w3id.org/emmo/hume#MeasuringInstrument"
696+
},
644697
},
645698
],
699+
"label": "MyDevice",
646700
}
647701

648702

tests/datadoc/test_keywords.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Test the Keywords class."""
22

3+
# pylint: disable=too-many-statements,wrong-import-position
4+
35
import pytest
46

57
pytest.importorskip("yaml")
68
pytest.importorskip("pyld")
79

8-
# pylint: disable=wrong-import-position
910
from tripper.datadoc import Keywords
1011

1112
# A fixture used by all the tests
@@ -14,10 +15,13 @@
1415

1516
def test_get_keywords():
1617
"""Test get_keywords() function."""
18+
import warnings
19+
1720
from dataset_paths import testdir # pylint: disable=import-error
1821

19-
from tripper import DDOC
20-
from tripper.datadoc import get_keywords
22+
from tripper import DDOC, OWL, XSD
23+
from tripper.datadoc import get_context, get_keywords
24+
from tripper.errors import TripperWarning
2125

2226
kw1 = get_keywords()
2327
assert kw1.data == keywords.data
@@ -65,6 +69,38 @@ def test_get_keywords():
6569
assert kw6.data.theme == ["ddoc:datadoc", "ddoc:prefixes", "ddoc:process"]
6670
assert "batchNumber" in kw6
6771

72+
kw7 = get_keywords(theme=None)
73+
assert len(kw7) == 0
74+
kw7.add({"resources": {"MyClass": {"iri": "http://example.com/MyClass"}}})
75+
assert len(kw7) == 0 # no properties in keywords
76+
77+
ctx = get_context(default_theme=None)
78+
ctx.add_context(
79+
{
80+
"ex": "http://example.com/",
81+
"owl": str(OWL),
82+
"xsd": str(XSD),
83+
"objprop": {"@id": "ex:objprop", "@type": "@id"},
84+
"dataprop": {"@id": "ex:dataprop", "@type": "xsd:string"},
85+
"cls": {"@id": "ex:cls", "@type": "owl:Class"},
86+
}
87+
)
88+
89+
# Test `context` argument to get_keywords(). Ignore expected
90+
# warnings about loss of information
91+
with warnings.catch_warnings():
92+
warnings.simplefilter("ignore", category=TripperWarning)
93+
94+
kw8 = get_keywords(kw7, context=ctx, theme=None)
95+
assert len(kw8) == 2 # 2 properties in keywords
96+
assert kw8.get_prefixes()["ex"] == "http://example.com/"
97+
assert set(kw8.classnames()) == {"Resource", "MyClass", "cls"}
98+
99+
kw9 = get_keywords(context=ctx, theme=None)
100+
assert len(kw9) == 2
101+
assert kw9.get_prefixes()["ex"] == "http://example.com/"
102+
assert set(kw9.classnames()) == {"Resource", "cls"}
103+
68104

69105
def test_iter():
70106
"""Test __iter__() method."""
@@ -155,7 +191,11 @@ def test_load_yaml():
155191
"""
156192
from dataset_paths import indir # pylint: disable=import-error
157193

158-
from tripper.datadoc.errors import ParseError
194+
from tripper.datadoc.errors import (
195+
ParseError,
196+
RedefineKeywordWarning,
197+
SkipRedefineKeywordWarning,
198+
)
159199

160200
kw = keywords.copy()
161201

@@ -195,10 +235,12 @@ def test_load_yaml():
195235
# keywords are unchanged by failures
196236
# assert kw == keywords
197237

198-
kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="skip")
238+
with pytest.warns(SkipRedefineKeywordWarning):
239+
kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="skip")
199240
assert kw["title"].iri == "dcterms:title"
200241

201-
kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="allow")
242+
with pytest.warns(RedefineKeywordWarning):
243+
kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="allow")
202244
assert kw["title"].iri == "myonto:a"
203245

204246
kw.load_yaml(indir / "valid_keywords.yaml")
@@ -485,6 +527,7 @@ def test_load2():
485527

486528
from tripper import Triplestore
487529
from tripper.datadoc import get_keywords
530+
from tripper.datadoc.errors import RedefineKeywordWarning
488531
from tripper.utils import AttrDict
489532

490533
ts = Triplestore("rdflib")
@@ -539,7 +582,8 @@ def test_load2():
539582
# Create a new Keywords object with
540583
# default keywords and load from the triplestore
541584
kw2 = get_keywords()
542-
kw2.load_rdf(ts, redefine="allow")
585+
with pytest.warns(RedefineKeywordWarning):
586+
kw2.load_rdf(ts, redefine="allow")
543587

544588
# Ensure that the specified keywords are in kw2
545589
assert not {

0 commit comments

Comments
 (0)