diff --git a/formal-semantics.md b/formal-semantics.md index c5b81b7..52c2c49 100644 --- a/formal-semantics.md +++ b/formal-semantics.md @@ -258,6 +258,24 @@ Abstract: URI × Maybe URI → Any Python: def execute(self, url: URIRef, block: URIRef = None) -> Any ``` +**ldh-GenerateOntologyViews** - Generate LDH views (`ldh:view`) and SPIN `sp:Select` queries for each non-`owl:FunctionalProperty` `owl:DatatypeProperty`/`owl:ObjectProperty` in an ontology graph +``` +Abstract: Graph × URI × URI → Graph +Python: def execute(self, ontology: rdflib.Graph, base_uri: URIRef, service_uri: URIRef) -> rdflib.Graph +``` + +**ldh-GenerateClassContainers** - Create an LDH container per `owl:Class` in an ontology graph (each with a SPARQL service and instance-list view) +``` +Abstract: Graph × URI × URI → Result +Python: def execute(self, ontology: rdflib.Graph, parent_container: URIRef, endpoint: URIRef) -> Result +``` + +**ldh-GeneratePortal** - End-to-end portal generation; composes `ExtractOntology`, `ldh-GenerateOntologyViews`, `POST`, and `ldh-GenerateClassContainers` +``` +Abstract: URI × URI × URI → Result +Python: def execute(self, endpoint: URIRef, ontology_namespace: URIRef, parent_container: URIRef) -> Result +``` + ### Schema Operations **ExtractClasses** - Extract RDF classes from graph @@ -278,6 +296,12 @@ Abstract: URI → Graph Python: def execute(self, endpoint: URIRef) -> rdflib.Graph ``` +**ExtractOntology** - Extract a full ontology (classes + datatype + object properties) from a SPARQL endpoint as a single graph +``` +Abstract: URI → Graph +Python: def execute(self, endpoint: URIRef) -> rdflib.Graph +``` + ### Utility Operations **Merge** - Merge multiple RDF graphs into one diff --git a/src/web_algebra/operations/linkeddatahub/content/generate_class_containers.py b/src/web_algebra/operations/linkeddatahub/content/generate_class_containers.py index 948f7d2..837653a 100644 --- a/src/web_algebra/operations/linkeddatahub/content/generate_class_containers.py +++ b/src/web_algebra/operations/linkeddatahub/content/generate_class_containers.py @@ -60,6 +60,18 @@ def execute(self, ontology: Graph, parent_container: URIRef, endpoint: URIRef) - Returns: Concatenated Result containing all operation results (CreateContainer + AddGenericService + POST bindings) """ + if not isinstance(ontology, Graph): + raise TypeError( + f"GenerateClassContainers operation expects 'ontology' to be Graph, got {type(ontology)}" + ) + if not isinstance(parent_container, URIRef): + raise TypeError( + f"GenerateClassContainers operation expects 'parent_container' to be URIRef, got {type(parent_container)}" + ) + if not isinstance(endpoint, URIRef): + raise TypeError( + f"GenerateClassContainers operation expects 'endpoint' to be URIRef, got {type(endpoint)}" + ) # Define namespaces LDH = Namespace("https://w3id.org/atomgraph/linkeddatahub#") SP = Namespace("http://spinrdf.org/sp#") diff --git a/src/web_algebra/operations/linkeddatahub/content/generate_ontology_views.py b/src/web_algebra/operations/linkeddatahub/content/generate_ontology_views.py index b3e2d27..010a342 100644 --- a/src/web_algebra/operations/linkeddatahub/content/generate_ontology_views.py +++ b/src/web_algebra/operations/linkeddatahub/content/generate_ontology_views.py @@ -1,18 +1,19 @@ +import hashlib from rdflib import URIRef, Literal, Namespace, Graph from rdflib.namespace import RDF, RDFS, XSD, DCTERMS from web_algebra.operation import Operation class GenerateOntologyViews(Operation): - """Generates LinkedDataHub view templates for non-functional properties. + """Generates LinkedDataHub views for ontology properties. Takes an extracted ontology graph and generates an RDF graph containing: - ldh:View resources for each non-functional property - SPIN sp:Select queries for retrieving related resources - - ldh:template links from classes to views + - ldh:view links from properties to views - A property is considered non-functional if it does not have a - owl:maxQualifiedCardinality restriction of 1. + Functional properties (declared `owl:FunctionalProperty`) are skipped: + they yield at most one value, so a table view would be redundant. """ @classmethod @@ -21,7 +22,7 @@ def name(cls): @classmethod def description(cls) -> str: - return "Generates LinkedDataHub view templates and SPIN queries for non-functional properties" + return "Generates LinkedDataHub views and SPIN queries for ontology properties (excluding owl:FunctionalProperty)" @classmethod def inputSchema(cls) -> dict: @@ -45,45 +46,49 @@ def inputSchema(cls) -> dict: } def execute(self, ontology: Graph, base_uri: URIRef, service_uri: URIRef) -> Graph: - """Generate LDH view templates for non-functional properties + """Generate LDH views for ontology properties Args: - ontology: RDF graph containing classes and properties with optional restrictions + ontology: RDF graph containing property declarations base_uri: Base URI for generating view and query resource URIs service_uri: URI of the sd:Service resource to be referenced by queries Returns: - RDF graph containing ldh:View, sp:Select, and ldh:template triples + RDF graph containing ldh:View, sp:Select, and ldh:view triples """ + if not isinstance(ontology, Graph): + raise TypeError( + f"GenerateOntologyViews operation expects 'ontology' to be Graph, got {type(ontology)}" + ) + if not isinstance(base_uri, URIRef): + raise TypeError( + f"GenerateOntologyViews operation expects 'base_uri' to be URIRef, got {type(base_uri)}" + ) + if not isinstance(service_uri, URIRef): + raise TypeError( + f"GenerateOntologyViews operation expects 'service_uri' to be URIRef, got {type(service_uri)}" + ) + # Define namespaces LDH = Namespace("https://w3id.org/atomgraph/linkeddatahub#") SP = Namespace("http://spinrdf.org/sp#") SPIN = Namespace("http://spinrdf.org/spin#") AC = Namespace("https://w3id.org/atomgraph/client#") - # Query to find all non-functional properties with their classes + # Find all distinct datatype/object properties that are not owl:FunctionalProperty. + # Views attach to properties (LDH `ldh:view` has rdfs:domain rdf:Property), so we + # iterate by property rather than by (class, property) pair. query = """ PREFIX rdf: - PREFIX rdfs: PREFIX owl: - SELECT DISTINCT ?class ?property ?propertyType ?range + SELECT DISTINCT ?property ?propertyType WHERE { - # Get all properties with their domain - ?property a ?propertyType ; - rdfs:domain ?class ; - rdfs:range ?range . + ?property a ?propertyType . FILTER(?propertyType IN (owl:DatatypeProperty, owl:ObjectProperty)) - - # Exclude functional properties (those with maxQualifiedCardinality = 1) - FILTER NOT EXISTS { - ?class rdfs:subClassOf ?restriction . - ?restriction a owl:Restriction ; - owl:onProperty ?property ; - owl:maxQualifiedCardinality 1 . - } + FILTER NOT EXISTS { ?property a owl:FunctionalProperty } } - ORDER BY ?class ?property + ORDER BY ?property """ results = ontology.query(query) @@ -98,48 +103,42 @@ def execute(self, ontology: Graph, base_uri: URIRef, service_uri: URIRef) -> Gra g.bind("rdfs", RDFS) g.bind("rdf", RDF) - # Generate views and queries for each non-functional property + seen_locals: set[str] = set() + for row in results: row_dict = row.asdict() - class_uri = row_dict["class"] property_uri = row_dict["property"] property_type = row_dict["propertyType"] - range_uri = row_dict["range"] - # Validate that all values are URIRefs - if not isinstance(class_uri, URIRef): - raise TypeError(f"Expected class to be URIRef, got {type(class_uri)}") if not isinstance(property_uri, URIRef): raise TypeError(f"Expected property to be URIRef, got {type(property_uri)}") if not isinstance(property_type, URIRef): raise TypeError(f"Expected propertyType to be URIRef, got {type(property_type)}") - if not isinstance(range_uri, URIRef): - raise TypeError(f"Expected range to be URIRef, got {type(range_uri)}") - # Extract local names for URIs - class_local = self._get_local_name(class_uri) + # Disambiguate when two properties share a local name (different namespaces). property_local = self._get_local_name(property_uri) + if property_local in seen_locals: + suffix = hashlib.sha1(str(property_uri).encode()).hexdigest()[:6] + property_local = f"{property_local}_{suffix}" + seen_locals.add(property_local) - # Generate URIs for view and query - view_uri = URIRef(f"{base_uri}#{class_local}_{property_local}_View") - query_uri = URIRef(f"{base_uri}#{class_local}_{property_local}_Query") + view_uri = URIRef(f"{base_uri}#{property_local}_View") + query_uri = URIRef(f"{base_uri}#{property_local}_Query") - # Generate human-readable title title = f"{property_local}" + sparql_text = self._generate_sparql_query(property_uri) - # Generate SPARQL query text - sparql_text = self._generate_sparql_query(property_uri, property_type, range_uri) - - # Create ldh:template link from class to view - g.add((class_uri, LDH.template, view_uri)) + # Attach view to property via ldh:view (forward direction). + # TODO: emit ldh:inverseView for selected object properties in a follow-up. + g.add((property_uri, LDH.view, view_uri)) - # Create ldh:View resource + # ldh:View resource g.add((view_uri, RDF.type, LDH.View)) g.add((view_uri, DCTERMS.title, Literal(title))) g.add((view_uri, SPIN.query, query_uri)) g.add((view_uri, AC.mode, AC.TableMode)) - # Create sp:Select query resource + # sp:Select query resource g.add((query_uri, RDF.type, SP.Select)) g.add((query_uri, DCTERMS.title, Literal(f"Select {property_local}"))) g.add((query_uri, RDFS.label, Literal(f"Select {property_local}"))) @@ -157,8 +156,8 @@ def _get_local_name(self, uri: URIRef) -> str: return uri_str.split('/')[-1] return uri_str - def _generate_sparql_query(self, property_uri: URIRef, property_type: URIRef, range_uri: URIRef) -> str: - """Generate SPARQL SELECT query for a property""" + def _generate_sparql_query(self, property_uri: URIRef) -> str: + """Generate SPARQL SELECT query for a property (forward direction)""" sparql = f"""SELECT DISTINCT ?related ?label WHERE {{ GRAPH ?relatedGraph {{ diff --git a/src/web_algebra/operations/linkeddatahub/content/generate_portal.py b/src/web_algebra/operations/linkeddatahub/content/generate_portal.py index 36e2852..711494e 100644 --- a/src/web_algebra/operations/linkeddatahub/content/generate_portal.py +++ b/src/web_algebra/operations/linkeddatahub/content/generate_portal.py @@ -51,6 +51,19 @@ def execute(self, endpoint: URIRef, ontology_namespace: URIRef, parent_container Returns: Concatenated Result containing all operation results """ + if not isinstance(endpoint, URIRef): + raise TypeError( + f"GeneratePortal operation expects 'endpoint' to be URIRef, got {type(endpoint)}" + ) + if not isinstance(ontology_namespace, URIRef): + raise TypeError( + f"GeneratePortal operation expects 'ontology_namespace' to be URIRef, got {type(ontology_namespace)}" + ) + if not isinstance(parent_container, URIRef): + raise TypeError( + f"GeneratePortal operation expects 'parent_container' to be URIRef, got {type(parent_container)}" + ) + import logging # Step 0: Create service resource for the SPARQL endpoint diff --git a/src/web_algebra/operations/schema/extract_ontology.py b/src/web_algebra/operations/schema/extract_ontology.py index 945b176..055fff7 100644 --- a/src/web_algebra/operations/schema/extract_ontology.py +++ b/src/web_algebra/operations/schema/extract_ontology.py @@ -27,6 +27,10 @@ def inputSchema(cls) -> dict: def execute(self, endpoint: URIRef) -> Graph: """Extract complete ontology by composing individual extraction operations""" + if not isinstance(endpoint, URIRef): + raise TypeError( + f"ExtractOntology operation expects 'endpoint' to be URIRef, got {type(endpoint)}" + ) # Extract classes classes_graph = ExtractClasses(settings=self.settings, context=self.context).execute(endpoint) diff --git a/tests/unit/test_extract_ontology.py b/tests/unit/test_extract_ontology.py new file mode 100644 index 0000000..52c9fd7 --- /dev/null +++ b/tests/unit/test_extract_ontology.py @@ -0,0 +1,29 @@ +"""Spec: formal-semantics.md "ExtractOntology - Extract a full ontology (classes ++ datatype + object properties) from a SPARQL endpoint as a single graph" +Abstract: URI → Graph +Python: def execute(self, endpoint: URIRef) -> rdflib.Graph +""" + +from __future__ import annotations + +import pytest +from rdflib import Literal + +from web_algebra.operation import Operation + + +class TestExtractOntologyPure: + def test_wrong_input_type_raises(self, settings): + op = Operation.get("ExtractOntology")(settings=settings) + with pytest.raises(TypeError): + op.execute(Literal("not-a-uri")) + + @pytest.mark.skip(reason="UNCLEAR(spec): is the URI a SPARQL endpoint, document URL, or ontology IRI? — narrative omits this") + def test_happy_path(self, settings): + pass + + +class TestExtractOntologyJson: + @pytest.mark.skip(reason="UNCLEAR(spec): JSON arg key for ExtractOntology not given by spec or existing fixtures") + def test_json_dispatch(self, settings): + pass diff --git a/tests/unit/test_ldh_generate_class_containers.py b/tests/unit/test_ldh_generate_class_containers.py new file mode 100644 index 0000000..a6026f2 --- /dev/null +++ b/tests/unit/test_ldh_generate_class_containers.py @@ -0,0 +1,47 @@ +"""Spec: formal-semantics.md "ldh-GenerateClassContainers - Create an LDH +container per `owl:Class` in an ontology graph (each with a SPARQL service and +instance-list view)" +Abstract: Graph × URI × URI → Result +Python: def execute(self, ontology: rdflib.Graph, parent_container: URIRef, endpoint: URIRef) -> Result +""" + +from __future__ import annotations + +import pytest +from rdflib import Graph, Literal, URIRef + +from web_algebra.operation import Operation + + +PARENT = URIRef("http://example.org/portal/") +ENDPOINT = URIRef("http://example.org/sparql") + + +class TestLDHGenerateClassContainersPure: + def test_wrong_ontology_type_raises(self, settings): + op = Operation.get("ldh-GenerateClassContainers")(settings=settings) + with pytest.raises(TypeError): + op.execute(Literal("not-a-graph"), PARENT, ENDPOINT) + + def test_wrong_parent_container_type_raises(self, settings): + op = Operation.get("ldh-GenerateClassContainers")(settings=settings) + with pytest.raises(TypeError): + op.execute(Graph(), Literal("not-a-uri"), ENDPOINT) + + def test_wrong_endpoint_type_raises(self, settings): + op = Operation.get("ldh-GenerateClassContainers")(settings=settings) + with pytest.raises(TypeError): + op.execute(Graph(), PARENT, Literal("not-a-uri")) + + +@pytest.mark.ldh +class TestLDHGenerateClassContainersLive: + @pytest.mark.skip(reason="UNCLEAR(spec): return type `Result` shape — what's a meaningful assertion for a side-effecting orchestration?") + def test_basic(self, settings_with_auth): + pass + + +class TestLDHGenerateClassContainersJson: + @pytest.mark.skip(reason="UNCLEAR(spec): JSON arg keys for ldh-GenerateClassContainers not given by spec or existing fixtures") + def test_json_dispatch(self, settings): + pass diff --git a/tests/unit/test_ldh_generate_ontology_views.py b/tests/unit/test_ldh_generate_ontology_views.py new file mode 100644 index 0000000..2b380e8 --- /dev/null +++ b/tests/unit/test_ldh_generate_ontology_views.py @@ -0,0 +1,80 @@ +"""Spec: formal-semantics.md "ldh-GenerateOntologyViews - Generate LDH views +(`ldh:view`) and SPIN `sp:Select` queries for each non-`owl:FunctionalProperty` +`owl:DatatypeProperty`/`owl:ObjectProperty` in an ontology graph" +Abstract: Graph × URI × URI → Graph +Python: def execute(self, ontology: rdflib.Graph, base_uri: URIRef, service_uri: URIRef) -> rdflib.Graph +""" + +from __future__ import annotations + +import pytest +from rdflib import Graph, Literal, Namespace, URIRef +from rdflib.namespace import OWL, RDF + +from web_algebra.operation import Operation + + +LDH = Namespace("https://w3id.org/atomgraph/linkeddatahub#") +EX = Namespace("http://example.org/ns#") +BASE = URIRef("http://example.org/portal/") +SERVICE = URIRef("http://example.org/portal/#Service") + + +class TestLDHGenerateOntologyViewsPure: + def test_wrong_ontology_type_raises(self, settings): + op = Operation.get("ldh-GenerateOntologyViews")(settings=settings) + with pytest.raises(TypeError): + op.execute(Literal("not-a-graph"), BASE, SERVICE) + + def test_wrong_base_uri_type_raises(self, settings): + op = Operation.get("ldh-GenerateOntologyViews")(settings=settings) + with pytest.raises(TypeError): + op.execute(Graph(), Literal("not-a-uri"), SERVICE) + + def test_wrong_service_uri_type_raises(self, settings): + op = Operation.get("ldh-GenerateOntologyViews")(settings=settings) + with pytest.raises(TypeError): + op.execute(Graph(), BASE, Literal("not-a-uri")) + + def test_emits_ldh_view_for_non_functional_property(self, settings): + """Spec: emits a view per non-`owl:FunctionalProperty` object/datatype property.""" + ontology = Graph() + ontology.add((EX.knows, RDF.type, OWL.ObjectProperty)) + + op = Operation.get("ldh-GenerateOntologyViews")(settings=settings) + out = op.execute(ontology, BASE, SERVICE) + + assert isinstance(out, Graph) + views = list(out.triples((EX.knows, LDH.view, None))) + assert len(views) == 1, f"expected one ldh:view triple for ex:knows, got {len(views)}" + + def test_skips_functional_property(self, settings): + """Spec: properties declared `owl:FunctionalProperty` are excluded.""" + ontology = Graph() + ontology.add((EX.ssn, RDF.type, OWL.DatatypeProperty)) + ontology.add((EX.ssn, RDF.type, OWL.FunctionalProperty)) + + op = Operation.get("ldh-GenerateOntologyViews")(settings=settings) + out = op.execute(ontology, BASE, SERVICE) + + views = list(out.triples((EX.ssn, LDH.view, None))) + assert len(views) == 0, "functional property must not get a view" + + def test_no_ldh_template_in_output(self, settings): + """Spec phrases the output predicate as `ldh:view` — `ldh:template` (the + previous LDH vocabulary, since removed) must not appear.""" + ontology = Graph() + ontology.add((EX.knows, RDF.type, OWL.ObjectProperty)) + ontology.add((EX.name, RDF.type, OWL.DatatypeProperty)) + + op = Operation.get("ldh-GenerateOntologyViews")(settings=settings) + out = op.execute(ontology, BASE, SERVICE) + + legacy = list(out.triples((None, LDH.template, None))) + assert legacy == [], "output must contain no `ldh:template` triples" + + +class TestLDHGenerateOntologyViewsJson: + @pytest.mark.skip(reason="UNCLEAR(spec): JSON arg keys for ldh-GenerateOntologyViews not given by spec or existing fixtures") + def test_json_dispatch(self, settings): + pass diff --git a/tests/unit/test_ldh_generate_portal.py b/tests/unit/test_ldh_generate_portal.py new file mode 100644 index 0000000..d71447c --- /dev/null +++ b/tests/unit/test_ldh_generate_portal.py @@ -0,0 +1,48 @@ +"""Spec: formal-semantics.md "ldh-GeneratePortal - End-to-end portal generation; +composes `ExtractOntology`, `ldh-GenerateOntologyViews`, `POST`, and +`ldh-GenerateClassContainers`" +Abstract: URI × URI × URI → Result +Python: def execute(self, endpoint: URIRef, ontology_namespace: URIRef, parent_container: URIRef) -> Result +""" + +from __future__ import annotations + +import pytest +from rdflib import Literal, URIRef + +from web_algebra.operation import Operation + + +ENDPOINT = URIRef("http://example.org/sparql") +ONTOLOGY_NS = URIRef("http://example.org/ontology/") +PARENT = URIRef("http://example.org/portal/") + + +class TestLDHGeneratePortalPure: + def test_wrong_endpoint_type_raises(self, settings): + op = Operation.get("ldh-GeneratePortal")(settings=settings) + with pytest.raises(TypeError): + op.execute(Literal("not-a-uri"), ONTOLOGY_NS, PARENT) + + def test_wrong_ontology_namespace_type_raises(self, settings): + op = Operation.get("ldh-GeneratePortal")(settings=settings) + with pytest.raises(TypeError): + op.execute(ENDPOINT, Literal("not-a-uri"), PARENT) + + def test_wrong_parent_container_type_raises(self, settings): + op = Operation.get("ldh-GeneratePortal")(settings=settings) + with pytest.raises(TypeError): + op.execute(ENDPOINT, ONTOLOGY_NS, Literal("not-a-uri")) + + +@pytest.mark.ldh +class TestLDHGeneratePortalLive: + @pytest.mark.skip(reason="UNCLEAR(spec): return type `Result` shape — what's a meaningful assertion for end-to-end portal generation?") + def test_basic(self, settings_with_auth): + pass + + +class TestLDHGeneratePortalJson: + @pytest.mark.skip(reason="UNCLEAR(spec): JSON arg keys for ldh-GeneratePortal not given by spec or existing fixtures") + def test_json_dispatch(self, settings): + pass