Skip to content

Commit 74aa474

Browse files
committed
LDO shapes, v1
1 parent 583419f commit 74aa474

7 files changed

Lines changed: 1535 additions & 0 deletions

File tree

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import { namedNode } from "@ldo/rdf-utils";
2+
import { parseRdf } from "@ldo/ldo";
3+
import { toRDF, type JsonLdDocument } from "jsonld";
4+
import {
5+
ContainerProfileShapeType,
6+
ContentProfileShapeType,
7+
NodeSchemaProfileShapeType,
8+
NodeInstanceProfileShapeType,
9+
RelationInstanceProfileShapeType,
10+
RelationTripleDefProfileShapeType,
11+
RelationDefProfileShapeType,
12+
} from "./ldo/dgBase.shapeTypes";
13+
import type {
14+
ContainerProfile,
15+
ContentProfile,
16+
NodeSchemaProfile,
17+
NodeInstanceProfile,
18+
RelationInstanceProfile,
19+
RelationTripleDefProfile,
20+
RelationDefProfile,
21+
} from "./ldo/dgBase.typings";
22+
23+
type ParseResult =
24+
| ContainerProfile
25+
| ContentProfile
26+
| NodeSchemaProfile
27+
| NodeInstanceProfile
28+
| RelationInstanceProfile
29+
| RelationTripleDefProfile
30+
| RelationDefProfile;
31+
32+
const typePredicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
33+
const nodeSchemaType = "https://discoursegraphs.com/schema/dg_base#NodeSchema";
34+
const relationDefType =
35+
"https://discoursegraphs.com/schema/dg_base#RelationDef";
36+
const domainPredicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#domain";
37+
const sourcePredicate = "https://discoursegraphs.com/schema/dg_base#source";
38+
const contentPredicate = "http://rdfs.org/sioc/ns#content";
39+
const descriptionPredicate = "http://purl.org/dc/elements/1.1/description";
40+
const containerType = "http://rdfs.org/sioc/ns#Container";
41+
42+
export const parseJsonLd = async (
43+
data: JsonLdDocument,
44+
baseIRI: string,
45+
): Promise<ParseResult[]> => {
46+
const asQuads = (await toRDF(data, {
47+
format: "application/n-quads",
48+
})) as string;
49+
const ldoDataset = await parseRdf(asQuads, {
50+
baseIRI,
51+
});
52+
const subjects = new Set(ldoDataset.toArray().map((q) => q.subject.value));
53+
const result: ParseResult[] = [];
54+
const typeMap: Record<string, string[]> = {};
55+
for (const q of ldoDataset.match(null, namedNode(typePredicate)).toArray()) {
56+
const s = q.subject.value;
57+
if (typeMap[s]) typeMap[s].push(q.object.value);
58+
else typeMap[s] = [q.object.value];
59+
}
60+
for (const subject of subjects) {
61+
const types = new Set(typeMap[subject]);
62+
if (types.has(containerType)) {
63+
result.push(
64+
ldoDataset.usingType(ContainerProfileShapeType).fromSubject(subject),
65+
);
66+
continue;
67+
}
68+
if (types.has(nodeSchemaType)) {
69+
result.push(
70+
ldoDataset.usingType(NodeSchemaProfileShapeType).fromSubject(subject),
71+
);
72+
continue;
73+
}
74+
if (types.has(relationDefType)) {
75+
if (ldoDataset.match(namedNode(subject), namedNode(domainPredicate)).size)
76+
result.push(
77+
ldoDataset
78+
.usingType(RelationTripleDefProfileShapeType)
79+
.fromSubject(subject),
80+
);
81+
else
82+
result.push(
83+
ldoDataset
84+
.usingType(RelationDefProfileShapeType)
85+
.fromSubject(subject),
86+
);
87+
continue;
88+
}
89+
if (
90+
ldoDataset.match(namedNode(subject), namedNode(contentPredicate)).size
91+
) {
92+
result.push(
93+
ldoDataset.usingType(ContentProfileShapeType).fromSubject(subject),
94+
);
95+
continue;
96+
}
97+
// happy path: The types are there
98+
const typesOfTypes = new Set(
99+
(typeMap[subject] || []).map((t) => typeMap[t] || []).flat(),
100+
);
101+
if (typesOfTypes.has(relationDefType)) {
102+
result.push(
103+
ldoDataset
104+
.usingType(RelationInstanceProfileShapeType)
105+
.fromSubject(subject),
106+
);
107+
continue;
108+
}
109+
if (typesOfTypes.has(nodeSchemaType)) {
110+
result.push(
111+
ldoDataset.usingType(NodeInstanceProfileShapeType).fromSubject(subject),
112+
);
113+
continue;
114+
}
115+
// otherwise use heuristics
116+
if (ldoDataset.match(namedNode(subject), namedNode(sourcePredicate)).size) {
117+
result.push(
118+
ldoDataset
119+
.usingType(RelationInstanceProfileShapeType)
120+
.fromSubject(subject),
121+
);
122+
continue;
123+
}
124+
if (
125+
ldoDataset.match(namedNode(subject), namedNode(descriptionPredicate)).size
126+
) {
127+
result.push(
128+
ldoDataset.usingType(NodeInstanceProfileShapeType).fromSubject(subject),
129+
);
130+
continue;
131+
}
132+
console.error("Could not interpret ", subject);
133+
}
134+
return result;
135+
};

0 commit comments

Comments
 (0)