Skip to content

Commit fd5cfc7

Browse files
committed
get file from which a shape originates from in rdf model service from directory'
1 parent 19cd8d6 commit fd5cfc7

7 files changed

Lines changed: 118 additions & 56 deletions

File tree

kgforge/core/archetypes/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ def mapping(self, entity: str, source: str, type: Callable) -> Mapping:
190190

191191
# Validation.
192192

193-
def schema_id(self, type: str) -> URIRef:
194-
# POLICY Should retrieve the schema id of the given type.
193+
def schema_source(self, type: str) -> str:
194+
# POLICY Should retrieve the schema source of the given type.
195195
not_supported()
196196

197197
def validate(self, data: Union[Resource, List[Resource]],

kgforge/specializations/models/rdf/rdf_model_service.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,24 @@
3333

3434

3535
class RdfModelService:
36-
shape_to_source: Dict[URIRef, str]
37-
class_to_shape: Dict[str, URIRef]
3836

39-
def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None:
37+
def __init__(
38+
self, graph: Graph,
39+
shape_to_source: Dict[URIRef, str],
40+
class_to_shape: Dict[str, URIRef],
41+
context_iri: Optional[str] = None,
42+
) -> None:
4043

4144
if context_iri is None:
4245
raise ConfigurationError("RdfModel requires a context")
4346
self._graph = graph
4447
self._context_cache = dict()
45-
self.shape_to_source, self.class_to_shape = self._build_shapes_map()
48+
self.shape_to_source = shape_to_source
49+
self.class_to_shape = class_to_shape
4650
self.context = Context(self.resolve_context(context_iri), context_iri)
4751
self.types_to_shapes: Dict[str, URIRef] = self._build_types_to_shapes()
4852

49-
def shape_source(self, schema_iri: URIRef) -> str:
53+
def get_shape_source(self, schema_iri: URIRef) -> str:
5054
return self.shape_to_source[schema_iri]
5155

5256
def sparql(self, query: str) -> List[Resource]:
@@ -100,11 +104,6 @@ def generate_context(self) -> Dict:
100104
"""Generates a JSON-LD context with the classes and terms present in the SHACL graph."""
101105
raise NotImplementedError()
102106

103-
@abstractmethod
104-
def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]:
105-
"""Queries the source and returns a map of owl:Class to sh:NodeShape"""
106-
raise NotImplementedError()
107-
108107
def _build_types_to_shapes(self) -> Dict[str, URIRef]:
109108
"""Iterates the classes_to_shapes dictionary to create a term to shape dictionary filtering
110109
the terms available in the context """

kgforge/specializations/models/rdf/rdf_model_service_from_directory.py

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,14 @@
2828
class RdfModelServiceFromDirectory(RdfModelService):
2929

3030
def __init__(self, dir_path: Path, context_iri: str) -> None:
31-
self._graph = load_rdf_files_into_graph(dir_path, Graph())
32-
self._shapes_graph = ShapesGraphWrapper(self._graph)
33-
super().__init__(self._graph, context_iri)
31+
32+
graph, shape_to_source, class_to_shape = self._build_shapes_map(dir_path=dir_path)
33+
self._shapes_graph = ShapesGraphWrapper(graph)
34+
35+
super().__init__(
36+
graph=graph, context_iri=context_iri, shape_to_source=shape_to_source,
37+
class_to_shape=class_to_shape
38+
)
3439

3540
def materialize(self, iri: URIRef) -> NodeProperties:
3641
sh = self._shapes_graph.lookup_shape_from_node(iri)
@@ -58,34 +63,53 @@ def resolve_context(self, iri: str) -> Dict:
5863
def generate_context(self) -> Dict:
5964
return self._generate_context()
6065

61-
def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]:
62-
query = """
63-
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
64-
PREFIX sh: <http://www.w3.org/ns/shacl#>
65-
SELECT ?type ?shape WHERE {
66-
{ ?shape sh:targetClass ?type .}
67-
UNION {
68-
SELECT (?shape as ?type) ?shape WHERE {
69-
?shape a sh:NodeShape .
70-
?shape a rdfs:Class
71-
}
72-
}
73-
} ORDER BY ?type
74-
"""
75-
res = self._graph.query(query)
76-
77-
class_to_shape: Dict[str, URIRef] = {
78-
row["type"]: URIRef(row["shape"])
79-
for row in res
80-
}
81-
82-
# FIXME should return the file path where the schema is in
83-
shape_to_file = dict(
84-
(e, "") # TODO file source
85-
for e in class_to_shape.values()
86-
)
66+
def _build_shapes_map(
67+
self, dir_path: Path
68+
) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]:
8769

88-
return shape_to_file, class_to_shape
70+
query = """
71+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
72+
PREFIX sh: <http://www.w3.org/ns/shacl#>
73+
SELECT ?type ?shape WHERE {
74+
{ ?shape sh:targetClass ?type .}
75+
UNION {
76+
SELECT (?shape as ?type) ?shape WHERE {
77+
?shape a sh:NodeShape .
78+
?shape a rdfs:Class
79+
}
80+
}
81+
} ORDER BY ?type
82+
"""
83+
84+
class_to_shape: Dict[str, URIRef] = dict()
85+
shape_to_file: Dict[URIRef, str] = dict()
86+
graph = Graph()
87+
88+
extensions = [".ttl", ".n3", ".json", ".rdf"]
89+
for f in dir_path.rglob(os.path.join("*.*")):
90+
graph_i = Graph()
91+
if f.suffix in extensions:
92+
file_format = guess_format(f.name)
93+
if file_format is None:
94+
file_format = "json-ld"
95+
graph_i.parse(f.as_posix(), format=file_format)
96+
97+
res = graph_i.query(query)
98+
99+
class_to_shape_i = dict(
100+
(row["type"], URIRef(row["shape"]))
101+
for row in res
102+
)
103+
class_to_shape.update(class_to_shape_i)
104+
105+
shape_to_file.update(dict(
106+
(e, f.as_posix())
107+
for e in class_to_shape_i.values()
108+
))
109+
110+
graph += graph_i
111+
112+
return graph, shape_to_file, class_to_shape
89113

90114

91115
def load_rdf_files_into_graph(path: Path, memory_graph: Graph) -> Graph:

kgforge/specializations/models/rdf/rdf_model_service_from_store.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@ def __init__(self, default_store: Store, context_iri: Optional[str] = None,
4646

4747
self._imported = []
4848

49-
g = Graph()
50-
self._shapes_graph = ShapesGraphWrapper(g)
51-
super().__init__(g, context_iri)
49+
graph, shape_to_resource, class_to_shape = self._build_shapes_map()
50+
self._shapes_graph = ShapesGraphWrapper(graph)
51+
52+
super().__init__(
53+
graph=graph, context_iri=context_iri, shape_to_source=shape_to_resource,
54+
class_to_shape=class_to_shape
55+
)
5256

5357
def materialize(self, iri: URIRef) -> NodeProperties:
5458
shape: ShapeWrapper = self._load_and_get_type_shape(iri)
@@ -75,7 +79,7 @@ def generate_context(self) -> Dict:
7579

7680
return self._generate_context()
7781

78-
def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]:
82+
def _build_shapes_map(self) -> Tuple[Graph, Dict[URIRef, str], Dict[str, URIRef]]:
7983
query = f"""
8084
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
8185
PREFIX sh: <http://www.w3.org/ns/shacl#>
@@ -95,8 +99,8 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]:
9599
limit = 100
96100
offset = 0
97101
count = limit
98-
class_to_shape = dict()
99-
shape_to_resource: Dict[URIRef, URIRef] = dict()
102+
class_to_shape: Dict[str, URIRef] = dict()
103+
shape_to_resource: Dict[URIRef, str] = dict()
100104

101105
while count == limit:
102106
resources = self.context_store.sparql(query, debug=False, limit=limit, offset=offset)
@@ -107,7 +111,7 @@ def _build_shapes_map(self) -> Tuple[Dict[URIRef, str], Dict[str, URIRef]]:
107111
count = len(resources)
108112
offset += count
109113

110-
return shape_to_resource, class_to_shape
114+
return Graph(), shape_to_resource, class_to_shape
111115

112116
def recursive_resolve(self, context: Union[Dict, List, str]) -> Dict:
113117
document = {}

kgforge/specializations/models/rdf_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ def get_shape_from_type(self, type: str) -> URIRef:
103103
raise ValueError(f"Type {type} not found")
104104
return self.service.types_to_shapes[type]
105105

106-
def schema_id(self, type: str) -> URIRef:
106+
def schema_source(self, type: str) -> str:
107107
shape_iri: URIRef = self.get_shape_from_type(type)
108-
return self.service.shape_source(shape_iri)
108+
return self.service.get_shape_source(shape_iri)
109109

110110
# Validation.
111111

tests/specializations/models/data.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# along with Blue Brain Nexus Forge. If not, see <https://choosealicense.com/licenses/lgpl-3.0/>.
1414
from copy import deepcopy
1515

16+
from utils import full_path_relative_to_root
1617

1718
ORGANIZATION = {
1819
"id": "",
@@ -134,4 +135,14 @@
134135
"Organization": "http://www.example.com/OrganizationShape",
135136
"Person": "http://www.example.com/PersonShape",
136137
"PostalAddress": "http://schema.org/PostalAddress",
137-
}
138+
}
139+
140+
SCHEMA_SOURCE_MAP = {
141+
"Activity": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-2.json'),
142+
"Association": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'),
143+
"Building": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-3.json'),
144+
"Employee": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'),
145+
"Organization": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'),
146+
"Person": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'),
147+
"PostalAddress": full_path_relative_to_root('tests/data/shacl-model/commons/shapes-1.json'),
148+
}

tests/specializations/models/test_rdf_model.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,35 @@ def valid_activity_resource(self, activity_json):
9999
return resource
100100

101101
@pytest.mark.parametrize("type_,", TYPES_SCHEMAS_MAP.keys())
102-
@pytest.mark.xfail
103102
def test_type_to_schema(self, rdf_model: RdfModel, type_):
104-
# FIXME TYPES_SCHEMAS_MAP should be a type to file dictionary
105-
# see _build_shapes_map from RdfModelServiceFromDirectory
106-
assert rdf_model.schema_id(type_) == URIRef(TYPES_SCHEMAS_MAP[type_])
103+
104+
# The problem:
105+
# For DirectoryService,
106+
# the best way to track the file from which a schema originates is
107+
# - before building the shapes map
108+
# - on service initialisation, when graph loading (gets all schemas),
109+
110+
# For StoreService,
111+
# the best way to track the file from which a schema originates is
112+
# - when building the shapes map, querying the store to get resource_id
113+
# - not on service initialisation, no graph loading
114+
# (empty graph provided, lazily loaded on request)
115+
116+
# Solution: graph loading should happen in building the shapes map. Shape loading returns:
117+
# the graph with the shapes (empty for Store, full for Directory)
118+
# shape_to_source
119+
# class_to_shape
120+
121+
# Would mean that in
122+
# - RdfModelServiceFromStore g = Graph() would happen in the implementation of
123+
# _build_shapes_map, and not in constructor
124+
# - RdfModelFromDirectory load_rdf_files_into_graph() would happen in the implementation of
125+
# _build_shapes_map, and not in constructor
126+
127+
# - RdfModelService: self.shape_to_source, self.class_to_shape can be parameters of the
128+
# constructor of this abstract class, and they'd be passed to super by the implementations
129+
130+
assert rdf_model.schema_source(type_) == SCHEMA_SOURCE_MAP[type_]
107131

108132
def test_validate_one(self, rdf_model: RdfModel, valid_activity_resource):
109133
rdf_model.validate(valid_activity_resource, False, type_="Activity")

0 commit comments

Comments
 (0)