Skip to content

Commit bfc62b1

Browse files
committed
Separate triple and dataset conversion from legacy
1 parent 0f2d5a9 commit bfc62b1

1 file changed

Lines changed: 50 additions & 37 deletions

File tree

lib/pyld/util.py

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,47 +61,60 @@ def from_legacy_dataset(dataset: dict) -> Dataset:
6161
for graph_name, triples in dataset.items():
6262
# Handle graph name
6363
try:
64-
if graph_name == '@default':
65-
g = ds.default_graph
66-
elif graph_name.startswith('_:'):
67-
# Check if graph name is a blank node or IRI
68-
g = ds.graph(BNode(graph_name[2:]))
69-
else:
70-
g = ds.graph(URIRef(graph_name))
64+
g = from_legacy_graph(graph_name, ds.default_graph)
7165
except Exception as err:
7266
raise ValueError(f'Illegal graph name: {graph_name}') from err
7367

7468
for t in triples:
75-
if not all(k in t for k in ('subject', 'predicate', 'object')):
76-
raise ValueError(f'Illegal quad structure: {t}')
77-
78-
def to_node(comp):
79-
if not isinstance(comp, dict) or 'type' not in comp or 'value' not in comp:
80-
raise ValueError(f'Illegal quad structure: {comp}')
81-
82-
val = comp['value']
83-
if comp['type'] == 'blank node':
84-
# Strip '_:' because RDFLib adds it back internally
85-
return BNode(val[2:] if val.startswith('_:') else val)
86-
elif comp['type'] == 'IRI':
87-
return URIRef(val)
88-
elif comp['type'] == 'literal':
89-
return Literal(
90-
val,
91-
lang=comp.get('language'),
92-
datatype=URIRef(comp['datatype'])
93-
if comp.get('datatype') and not comp.get('language')
94-
else None,
95-
# Don't normalize literal values to prevent datetime issues
96-
# TODO: this means only rdflib.Dataset() created with normalization turned off will work properly.
97-
normalize=False,
98-
)
99-
raise ValueError('Illegal component type {}'.format(comp['type']))
100-
101-
s = to_node(t['subject'])
102-
p = to_node(t['predicate'])
103-
o = to_node(t['object'])
104-
69+
s, p, o = from_legacy_triple(t)
10570
ds.add((s, p, o, g))
10671

10772
return ds
73+
74+
def from_legacy_graph(graph: str, default_graph = DATASET_DEFAULT_GRAPH_ID) -> URIRef | BNode:
75+
"""
76+
Converts a legacy graph name into an rdflib URIRef or BNode.
77+
"""
78+
if graph == '@default':
79+
return default_graph
80+
# Check if graph name is a blank node or IRI
81+
elif graph.startswith('_:'):
82+
return BNode(graph[2:])
83+
else:
84+
return URIRef(graph)
85+
86+
def from_legacy_triple(triple: dict, normalize=False) -> tuple:
87+
"""
88+
Converts a legacy triple dict into an rdflib triple tuple.
89+
"""
90+
if not all(k in triple for k in ('subject', 'predicate', 'object')):
91+
raise ValueError(f'Illegal quad structure: {triple}')
92+
93+
def to_node(comp):
94+
if not isinstance(comp, dict) or 'type' not in comp or 'value' not in comp:
95+
raise ValueError(f'Illegal quad structure: {comp}')
96+
97+
val = comp['value']
98+
if comp['type'] == 'blank node':
99+
# Strip '_:' because RDFLib adds it back internally
100+
return BNode(val[2:] if val.startswith('_:') else val)
101+
elif comp['type'] == 'IRI':
102+
return URIRef(val)
103+
elif comp['type'] == 'literal':
104+
return Literal(
105+
val,
106+
lang=comp.get('language'),
107+
datatype=URIRef(comp['datatype'])
108+
if comp.get('datatype') and not comp.get('language')
109+
else None,
110+
# Don't normalize literal values to prevent datetime issues
111+
# TODO: this means only rdflib.Dataset() created with normalization turned off will work properly.
112+
normalize=normalize,
113+
)
114+
raise ValueError('Illegal component type {}'.format(comp['type']))
115+
116+
s = to_node(triple['subject'])
117+
p = to_node(triple['predicate'])
118+
o = to_node(triple['object'])
119+
120+
return (s, p, o)

0 commit comments

Comments
 (0)