Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
fa8707b
Add stub for RDFC-1.0 and prepare tests.
mielvds Feb 5, 2026
3343dc5
Add rdf-canon to submodules
mielvds Feb 16, 2026
8f14f5d
Add rdflib to dependencies
mielvds Apr 15, 2026
043fd1f
Re-enable canon tests
mielvds Apr 15, 2026
e6cb90f
Introduce rdflib into canon.py and replace internal model.
mielvds Apr 15, 2026
7ad1aae
Don't normalize literals
mielvds Apr 16, 2026
d001121
Fixes positions and does small touchups
mielvds Apr 20, 2026
7f52725
Replace permutations with stdlib
mielvds Apr 20, 2026
03a08c6
Fix identifier ordering
mielvds Apr 20, 2026
52bc07c
Switch from NT to NQ serialization
mielvds Apr 20, 2026
7cad9c8
Remove legacy nquads parsing dependency by moving parsing to canon.py
mielvds Apr 21, 2026
af4bfae
Move triple data structure convert functions to util.py
mielvds Apr 21, 2026
bb58090
Cleanup of old canon code
mielvds Apr 21, 2026
e4d4da3
Add tests for conversion methods in util.py and do fixes
mielvds Apr 21, 2026
67f4db7
Make NQ serialization part of class & add override for RDFC1.0
mielvds Apr 22, 2026
2c56eb4
Fix RDFC1.0 literal encoding
mielvds Apr 22, 2026
01dafce
Add configurable hashAlgorithm
mielvds Apr 22, 2026
9308065
Add option to return the bnode map.
mielvds Apr 22, 2026
c1f56d3
Rename _main to _canonicalize, add docstring and move function
mielvds Apr 22, 2026
999730d
Minimize change
mielvds Apr 23, 2026
3178293
Make bnode map merge from parser more simple and robust
mielvds Apr 27, 2026
0f2d5a9
Make linter happy
mielvds Apr 27, 2026
bfc62b1
Separate triple and dataset conversion from legacy
mielvds Apr 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "specifications/normalization"]
path = specifications/normalization
url = https://github.com/json-ld/normalization.git
[submodule "specifications/rdf-canon"]
path = specifications/rdf-canon
url = https://github.com/w3c/rdf-canon.git
350 changes: 221 additions & 129 deletions lib/pyld/canon.py

Large diffs are not rendered by default.

54 changes: 29 additions & 25 deletions lib/pyld/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from c14n.Canonicalize import canonicalize
from pyld.__about__ import __copyright__, __license__, __version__
from pyld.canon import URDNA2015, URGNA2012, UnknownFormatError
from pyld.canon import RDFC10, URDNA2015, URGNA2012, UnknownFormatError
from pyld.identifier_issuer import IdentifierIssuer
from pyld.nquads import ParserError, parse_nquads, serialize_nquad, serialize_nquads

Expand Down Expand Up @@ -286,8 +286,8 @@ def normalize(input_, options=None):

:param input_: the JSON-LD input to normalize.
:param [options]: the options to use.
[algorithm] the algorithm to use: `URDNA2015` or `URGNA2012`
(default: `URGNA2012`).
[algorithm] the algorithm to use: `RDFC10`, `URDNA2015` or `URGNA2012`
(default: `RDFC10`).
[base] the base IRI to use.
[inputFormat] the format if input is not JSON-LD:
'application/n-quads' for N-Quads.
Expand Down Expand Up @@ -925,8 +925,10 @@ def normalize(self, input_, options):

:param input_: the JSON-LD input to normalize.
:param options: the options to use.
[algorithm] the algorithm to use: `URDNA2015` or `URGNA2012`
(default: `URGNA2012`).
[algorithm] the algorithm to use: `RDFC10`, `URDNA2015` or `URGNA2012`
(default: `RDFC10`).
[hashAlgorithm] the hashing algorithm to use; only applicable to `RDFC10`.
(default: `SHA256`).
[base] the base IRI to use.
[contextResolver] internal use only.
[inputFormat] the format if input is not JSON-LD:
Expand All @@ -935,12 +937,15 @@ def normalize(self, input_, options):
'application/n-quads' for N-Quads.
[documentLoader(url, options)] the document loader
(default: _default_document_loader).
[outputMap] if True, the function will return a map of blank node
identifiers to their normalized identifiers instead of the
normalized dataset (default: False).

:return: the normalized output.
:return: the normalized output or the map of blank node identifiers.
"""
# set default options
options = options.copy() if options else {}
options.setdefault('algorithm', 'URGNA2012')
options.setdefault('algorithm', 'RDFC10')
options.setdefault('base', input_ if _is_string(input_) else '')
options.setdefault('documentLoader', _default_document_loader)
options.setdefault(
Expand All @@ -950,21 +955,14 @@ def normalize(self, input_, options):
options.setdefault('extractAllScripts', True)
options.setdefault('processingMode', 'json-ld-1.1')

if options['algorithm'] not in ['URDNA2015', 'URGNA2012']:
if options['algorithm'] not in ['RDFC10', 'URDNA2015', 'URGNA2012']:
raise JsonLdError(
'Unsupported normalization algorithm.', 'jsonld.NormalizeError'
)

try:
if 'inputFormat' in options:
if (
options['inputFormat'] != 'application/n-quads'
and options['inputFormat'] != 'application/nquads'
):
raise JsonLdError(
'Unknown normalization input format.', 'jsonld.NormalizeError'
)
dataset = JsonLdProcessor.parse_nquads(input_)
dataset = input_
else:
# convert to RDF dataset then do normalization
opts = dict(options)
Expand All @@ -979,16 +977,22 @@ def normalize(self, input_, options):
) from cause

# do normalization
if options['algorithm'] == 'URDNA2015':
try:
return URDNA2015().main(dataset, options)
except UnknownFormatError as cause:
raise JsonLdError(
str(cause), 'jsonld.UnknownFormat', {'format': cause.format}
) from cause

if options['algorithm'] == 'RDFC10':
algorithm = RDFC10(hash_algorithm = options.get('hashAlgorithm'))
elif options['algorithm'] == 'URDNA2015':
algorithm = URDNA2015()
# assume URGNA2012
return URGNA2012().main(dataset, options)
else:
algorithm = URGNA2012()

try:
return algorithm.main(dataset, options)
except UnknownFormatError as cause:
raise JsonLdError(
str(cause),
'jsonld.UnknownFormat',
{'format': cause.format}) from cause


def from_rdf(self, dataset, options):
"""
Expand Down
120 changes: 120 additions & 0 deletions lib/pyld/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from rdflib import BNode, Dataset, Literal, URIRef
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID


# Helpers for converting between rdflib.Dataset and legacy dict structure used in pyld
def to_legacy_dataset(dataset: Dataset) -> dict:
"""
Transforms an rdflib.Dataset into the RDF.js-style dictionary structure,
ensuring Blank Node values start with '_:'.
"""
compat_dataset = {'@default':[]}

for s, p, o, g in dataset.quads((None, None, None, None)):
# 1. Determine Graph Key
graph_id = '@default'
if g is not None and g != DATASET_DEFAULT_GRAPH_ID:
graph_id = f"_:{str(g)}" if isinstance(g, BNode) else str(g)

if graph_id not in compat_dataset:
compat_dataset[graph_id] = []

# 2. Helper to convert nodes
def term_to_dict(node):
if isinstance(node, BNode):
# Ensure the value starts with _:
val = str(node)
if not val.startswith('_:'):
val = f"_:{val}"
return {'type': 'blank node', 'value': val}

elif isinstance(node, URIRef):
return {'type': 'IRI', 'value': str(node)}

elif isinstance(node, Literal):
res = {'type': 'literal', 'value': str(node)}
if node.language:
res['language'] = node.language
if node.datatype:
res['datatype'] = str(node.datatype)
return res
raise ValueError(f'Illegal node type {type(node)}')

# 3. Build legacy quad
compat_dataset[graph_id].append(
{
'subject': term_to_dict(s),
'predicate': term_to_dict(p),
'object': term_to_dict(o),
}
)

return compat_dataset


def from_legacy_dataset(dataset: dict) -> Dataset:
"""
Converts legacy dict structure back into an rdflib.Dataset.
"""
ds = Dataset()

for graph_name, triples in dataset.items():
# Handle graph name
try:
g = from_legacy_graph(graph_name, ds.default_graph)
except Exception as err:
raise ValueError(f'Illegal graph name: {graph_name}') from err

for t in triples:
s, p, o = from_legacy_triple(t)
ds.add((s, p, o, g))

return ds

def from_legacy_graph(graph: str, default_graph = DATASET_DEFAULT_GRAPH_ID) -> URIRef | BNode:
"""
Converts a legacy graph name into an rdflib URIRef or BNode.
"""
if graph == '@default':
return default_graph
# Check if graph name is a blank node or IRI
elif graph.startswith('_:'):
return BNode(graph[2:])
else:
return URIRef(graph)

def from_legacy_triple(triple: dict, normalize=False) -> tuple:
"""
Converts a legacy triple dict into an rdflib triple tuple.
"""
if not all(k in triple for k in ('subject', 'predicate', 'object')):
raise ValueError(f'Illegal quad structure: {triple}')

def to_node(comp):
if not isinstance(comp, dict) or 'type' not in comp or 'value' not in comp:
raise ValueError(f'Illegal quad structure: {comp}')

val = comp['value']
if comp['type'] == 'blank node':
# Strip '_:' because RDFLib adds it back internally
return BNode(val[2:] if val.startswith('_:') else val)
elif comp['type'] == 'IRI':
return URIRef(val)
elif comp['type'] == 'literal':
return Literal(
val,
lang=comp.get('language'),
datatype=URIRef(comp['datatype'])
if comp.get('datatype') and not comp.get('language')
else None,
# Don't normalize literal values to prevent datetime issues
# TODO: this means only rdflib.Dataset() created with normalization turned off will work properly.
normalize=normalize,
)
raise ValueError('Illegal component type {}'.format(comp['type']))

s = to_node(triple['subject'])
p = to_node(triple['predicate'])
o = to_node(triple['object'])

return (s, p, o)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ aiohttp; python_version >= '3.5'
lxml
cachetools
frozendict
rdflib
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@
'cachetools',
'frozendict',
'lxml',
'rdflib',
],
extras_require={
'requests': ['requests'],
'aiohttp': ['aiohttp'],
'cachetools': ['cachetools'],
'frozendict': ['frozendict'],
'rdflib': ['rdflib'],
}
)
50 changes: 40 additions & 10 deletions tests/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@
'https://w3c.github.io/json-ld-api/tests',
'https://w3c.github.io/json-ld-framing/tests',
'https://github.com/json-ld/normalization/tests',
'https://w3c.github.io/rdf-canon/tests/vocab#'
]

SPEC_DIRS = [
'../specifications/json-ld-api/tests/',
'../specifications/json-ld-framing/tests/',
'../specifications/normalization/tests/',
'../specifications/rdf-canon/tests/'
]

# NOTE: The following TestRunner class can be removed because pytest now
Expand Down Expand Up @@ -477,6 +479,10 @@ def runTest(self):
print('ACTUAL: ', result)
raise AssertionError('results differ')
elif not self.is_negative:
# If the result is a dict and the expected value is a string,
# the expected value is probably JSON.
if isinstance(result, dict) and isinstance(expect, str):
expect = json.loads(expect)
# Perform order-independent equivalence test
if equal_unordered(result, expect):
self.assertTrue(True)
Expand Down Expand Up @@ -662,6 +668,7 @@ def create(test):
if k not in http_options:
options[k] = v
options['documentLoader'] = create_document_loader(test)
options['hashAlgorithm'] = test.data.get('hashAlgorithm')
options.update(opts or {})
if 'expandContext' in options:
filename = os.path.join(test.dirname, options['expandContext'])
Expand Down Expand Up @@ -1058,11 +1065,7 @@ def write(self, filename):
},
'rdfn:Urgna2012EvalTest': {
'pending': {'idRegex': []},
'skip': {
'idRegex': [
'.*manifest-urgna2012#test060$',
]
},
'skip': {'idRegex': []},
'fn': 'normalize',
'params': [
read_test_property('action'),
Expand All @@ -1077,11 +1080,7 @@ def write(self, filename):
},
'rdfn:Urdna2015EvalTest': {
'pending': {'idRegex': []},
'skip': {
'idRegex': [
'.*manifest-urdna2015#test060$',
]
},
'skip': {'idRegex': []},
'fn': 'normalize',
'params': [
read_test_property('action'),
Expand All @@ -1094,6 +1093,37 @@ def write(self, filename):
),
],
},
'rdfc:RDFC10EvalTest': {
'pending': {'idRegex': []},
'skip': {'idRegex': []},
'fn': 'normalize',
'params': [
read_test_property('action'),
create_test_options({
'algorithm': 'RDFC10',
'inputFormat': 'application/n-quads',
'format': 'application/n-quads'
})
]
},
'rdfc:RDFC10MapTest': {
'pending': {
'idRegex': []
},
'skip': {
'idRegex': []
},
'fn': 'normalize',
'params': [
read_test_property('action'),
create_test_options({
'algorithm': 'RDFC10',
'inputFormat': 'application/n-quads',
'format': 'application/n-quads',
'outputMap': True
})
]
}
}


Expand Down
Loading
Loading