Skip to content

Commit 1d5a34c

Browse files
committed
test fixes
1 parent b1fc4e4 commit 1d5a34c

14 files changed

Lines changed: 627 additions & 251 deletions

README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -587,9 +587,25 @@ internal static class ShaclValidationDemo
587587
}
588588
```
589589

590-
`ValidateShacl()` uses default Markdown-LD Knowledge Bank shapes backed by `dotNetRdf.Shacl`. The default shapes validate article names, entity names, `schema:sameAs` IRIs, provenance IRIs, and assertion confidence metadata.
590+
`ValidateShacl()` uses default Markdown-LD Knowledge Bank shapes backed by `dotNetRdf.Shacl`. The default shapes validate article names, entity names, `schema:sameAs` IRIs, provenance IRIs, and assertion confidence metadata when reified assertion metadata is present.
591591

592-
Graph assertions remain direct RDF edges for existing SPARQL and search callers. Each assertion also gets RDF reification metadata as an `rdf:Statement` with `rdf:subject`, `rdf:predicate`, `rdf:object`, `kb:confidence`, and optional `prov:wasDerivedFrom`, so SHACL can validate assertion metadata without changing the query shape of the main graph.
592+
Graph assertions always remain direct RDF edges for existing SPARQL and search callers. Reified assertion metadata is now an explicit throughput trade-off:
593+
594+
- default builds keep only the direct RDF edges, which is the fast path for large Markdown corpora and tokenized graphs
595+
- opt in to RDF reification when the caller needs `rdf:Statement` metadata with `rdf:subject`, `rdf:predicate`, `rdf:object`, `kb:confidence`, and optional `prov:wasDerivedFrom`
596+
597+
Use `KnowledgeGraphBuildOptions.IncludeAssertionReification = true` when assertion-level provenance and confidence triples must be queryable:
598+
599+
```csharp
600+
var pipeline = new MarkdownKnowledgePipeline(new MarkdownKnowledgePipelineOptions
601+
{
602+
BaseUri = new Uri("https://kb.example/"),
603+
BuildOptions = new KnowledgeGraphBuildOptions
604+
{
605+
IncludeAssertionReification = true,
606+
},
607+
});
608+
```
593609

594610
Pass custom Turtle shapes when the host application needs stricter rules:
595611

docs/Features/GraphShaclValidation.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@ The feature uses `dotNetRdf.Shacl` over the in-memory `KnowledgeGraph`. It does
1414
flowchart LR
1515
Markdown["Markdown documents"] --> Pipeline["MarkdownKnowledgePipeline"]
1616
Pipeline --> Facts["Merged entities and assertions"]
17-
Facts --> Reification["Direct RDF edges plus rdf:Statement metadata"]
18-
Reification --> Graph["KnowledgeGraph"]
17+
Facts --> Graph["KnowledgeGraph"]
18+
Graph --> Reification["Optional rdf:Statement metadata"]
1919
Shapes["Default or caller SHACL shapes"] --> Validator["dotNetRDF ShapesGraph"]
2020
Graph --> Validator
21+
Reification --> Validator
2122
Validator --> Report["KnowledgeGraphShaclValidationReport"]
2223
Report --> Caller["Caller"]
2324
```
@@ -30,18 +31,18 @@ The built-in shapes graph validates:
3031
- common entity classes have `schema:name`.
3132
- `schema:sameAs` values are IRIs.
3233
- `prov:wasDerivedFrom` values are IRIs.
33-
- reified `rdf:Statement` assertion metadata has one IRI subject, predicate, object, and a decimal `kb:confidence` from 0 through 1.
34+
- reified `rdf:Statement` assertion metadata, when included, has one IRI subject, predicate, object, and a decimal `kb:confidence` from 0 through 1.
3435

3536
Callers can pass custom Turtle SHACL shapes to `KnowledgeGraph.ValidateShacl(shapesTurtle)` or `MarkdownKnowledgeBuildResult.ValidateShacl(shapesTurtle)`.
3637

3738
## Assertion Metadata
3839

39-
Graph assertions remain direct RDF edges for existing SPARQL/search callers. Each assertion also receives RDF reification metadata:
40+
Graph assertions remain direct RDF edges for existing SPARQL/search callers. RDF reification metadata is optional and controlled by `KnowledgeGraphBuildOptions.IncludeAssertionReification`.
4041

4142
```mermaid
4243
flowchart TB
4344
Subject["subject IRI"] -->|"direct predicate"| Object["object IRI"]
44-
Statement["blank rdf:Statement"] -->|"rdf:subject"| Subject
45+
Statement["optional blank rdf:Statement"] -->|"rdf:subject"| Subject
4546
Statement -->|"rdf:predicate"| Predicate["predicate IRI"]
4647
Statement -->|"rdf:object"| Object
4748
Statement -->|"kb:confidence"| Confidence["xsd:decimal"]

src/MarkdownLd.Kb/Graph/Build/KnowledgeGraphBuilder.cs

Lines changed: 76 additions & 142 deletions
Large diffs are not rendered by default.
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
using System.Globalization;
2+
using VDS.RDF;
3+
using static ManagedCode.MarkdownLd.Kb.Pipeline.PipelineConstants;
4+
5+
namespace ManagedCode.MarkdownLd.Kb.Pipeline;
6+
7+
internal sealed class KnowledgeGraphMaterializationContext(Graph graph)
8+
{
9+
private readonly Dictionary<string, IUriNode> _uriNodes = new(StringComparer.Ordinal);
10+
private readonly Dictionary<string, ILiteralNode> _literalNodes = new(StringComparer.Ordinal);
11+
private readonly Dictionary<TypedLiteralKey, ILiteralNode> _typedLiteralNodes = [];
12+
private readonly Dictionary<string, Uri?> _predicateUris = new(StringComparer.OrdinalIgnoreCase);
13+
private readonly Dictionary<string, Uri> _typeUris = new(StringComparer.OrdinalIgnoreCase);
14+
15+
public Graph Graph { get; } = graph;
16+
17+
public IUriNode UriNode(Uri uri)
18+
{
19+
var key = uri.AbsoluteUri;
20+
return _uriNodes.TryGetValue(key, out var node)
21+
? node
22+
: _uriNodes[key] = Graph.CreateUriNode(uri);
23+
}
24+
25+
public ILiteralNode LiteralNode(string value)
26+
{
27+
return _literalNodes.TryGetValue(value, out var node)
28+
? node
29+
: _literalNodes[value] = Graph.CreateLiteralNode(value);
30+
}
31+
32+
public ILiteralNode TypedLiteral(string value, Uri datatype)
33+
{
34+
var key = new TypedLiteralKey(value, datatype.AbsoluteUri);
35+
return _typedLiteralNodes.TryGetValue(key, out var node)
36+
? node
37+
: _typedLiteralNodes[key] = Graph.CreateLiteralNode(value, datatype);
38+
}
39+
40+
public INode UriOrLiteralNode(string value)
41+
{
42+
return Uri.TryCreate(value, UriKind.Absolute, out var absolute)
43+
? UriNode(absolute)
44+
: LiteralNode(value);
45+
}
46+
47+
public ILiteralNode ConfidenceLiteral(double confidence)
48+
{
49+
return TypedLiteral(confidence.ToString(CultureInfo.InvariantCulture), XsdDecimalUri);
50+
}
51+
52+
public ILiteralNode DateLiteral(string? value)
53+
{
54+
return DateOnly.TryParse(value, out var dateOnly)
55+
? TypedLiteral(dateOnly.ToString(DotNetDateFormat, CultureInfo.InvariantCulture), XsdDateUri)
56+
: LiteralNode(value ?? string.Empty);
57+
}
58+
59+
public Uri? ResolvePredicateUri(string predicate)
60+
{
61+
return _predicateUris.TryGetValue(predicate, out var cached)
62+
? cached
63+
: _predicateUris[predicate] = ResolvePredicate(predicate);
64+
}
65+
66+
public Uri ResolveTypeUri(string type)
67+
{
68+
return _typeUris.TryGetValue(type, out var cached)
69+
? cached
70+
: _typeUris[type] = NormalizeTypeUri(type);
71+
}
72+
73+
private static Uri? ResolvePredicate(string predicate)
74+
{
75+
if (predicate.Contains(':', StringComparison.Ordinal))
76+
{
77+
var separatorIndex = predicate.IndexOf(':');
78+
var prefix = predicate[..separatorIndex];
79+
var local = predicate[(separatorIndex + 1)..];
80+
return prefix.ToLowerInvariant() switch
81+
{
82+
SchemaPrefix => new Uri(SchemaNamespaceText + local),
83+
KbPrefix => new Uri(KbNamespaceText + local),
84+
ProvPrefix => new Uri(ProvNamespaceText + local),
85+
RdfPrefix => new Uri(RdfNamespaceText + local),
86+
RdfsPrefix => new Uri(RdfsNamespaceText + local),
87+
OwlPrefix => new Uri(OwlNamespaceText + local),
88+
SkosPrefix => new Uri(SkosNamespaceText + local),
89+
XsdPrefix => new Uri(XsdNamespaceText + local),
90+
_ => Uri.TryCreate(predicate, UriKind.Absolute, out var prefixedAbsolute)
91+
? prefixedAbsolute
92+
: null,
93+
};
94+
}
95+
96+
if (Uri.TryCreate(predicate, UriKind.Absolute, out var absolute))
97+
{
98+
return absolute;
99+
}
100+
101+
return predicate.ToLowerInvariant() switch
102+
{
103+
MentionPredicateKey => SchemaMentionsUri,
104+
AboutPredicateKey => SchemaAboutUri,
105+
AuthorPredicateKey => SchemaAuthorUri,
106+
CreatorPredicateKey => SchemaCreatorUri,
107+
HasPartPredicateKey => SchemaHasPartUri,
108+
SameAsPredicateKey => SchemaSameAsUri,
109+
_ => null,
110+
};
111+
}
112+
113+
private static Uri NormalizeTypeUri(string type)
114+
{
115+
if (type.Contains(':', StringComparison.Ordinal))
116+
{
117+
return ResolvePredicate(type) ?? SchemaThingTypeUri();
118+
}
119+
120+
return new Uri(SchemaNamespaceText + KnowledgeNaming.Slugify(type));
121+
}
122+
123+
private static Uri SchemaThingTypeUri()
124+
{
125+
return new Uri(SchemaNamespaceText + KnowledgeNaming.Slugify(SchemaThingTypeText));
126+
}
127+
128+
private readonly record struct TypedLiteralKey(string Value, string DatatypeUri);
129+
}

src/MarkdownLd.Kb/Graph/Build/KnowledgeGraphRules.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ public sealed record KnowledgeGraphBuildOptions
66

77
public bool IncludeFrontMatterRules { get; init; } = true;
88

9+
public bool IncludeAssertionReification { get; init; }
10+
911
public KnowledgeGraphSemanticLayerOptions SemanticLayers { get; init; } = KnowledgeGraphSemanticLayerOptions.Default;
1012

1113
public IReadOnlyList<KnowledgeGraphEntityRule> Entities { get; init; } = [];

src/MarkdownLd.Kb/Graph/Build/KnowledgeGraphSemanticLayerBuilder.cs

Lines changed: 111 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,19 @@ private void AddSkosLayer(
139139
AssertConceptScheme(graph, schemeNode, options.ConceptSchemeLabel, includeOntologyTypes);
140140

141141
var aboutLabels = CollectAboutLabels(documents);
142-
foreach (var conceptUri in CollectConceptUris(graph))
142+
var conceptUris = CollectConceptUris(graph);
143+
conceptUris.Remove(conceptSchemeUri);
144+
var conceptLabels = CollectConceptLabels(graph);
145+
var definedTerms = CollectTypedSubjects(graph, SchemaDefinedTermUri);
146+
var sameAsTargets = CollectUriTargets(graph, SchemaSameAsUri);
147+
148+
foreach (var conceptUri in conceptUris)
143149
{
144150
var conceptNode = graph.CreateUriNode(conceptUri);
145151
_ = new SkosConcept(conceptNode, graph);
146-
AssertConcept(graph, conceptNode, schemeNode, includeOntologyTypes && ShouldApplyOntologyConceptType(graph, conceptNode));
147-
AssertConceptLabels(graph, conceptNode, aboutLabels);
148-
MirrorSchemaSameAsAsSkosExactMatch(graph, conceptNode);
152+
AssertConcept(graph, conceptNode, schemeNode, includeOntologyTypes && ShouldApplyOntologyConceptType(conceptUri, definedTerms, conceptLabels));
153+
AssertConceptLabels(graph, conceptNode, conceptLabels, aboutLabels);
154+
MirrorSchemaSameAsAsSkosExactMatch(graph, conceptNode, sameAsTargets);
149155
}
150156
}
151157

@@ -195,10 +201,12 @@ private static void AssertConcept(
195201
graph.Assert(new Triple(conceptNode, graph.CreateUriNode(SkosTopConceptOfUri), schemeNode));
196202
}
197203

198-
private static bool ShouldApplyOntologyConceptType(Graph graph, IUriNode conceptNode)
204+
private static bool ShouldApplyOntologyConceptType(
205+
Uri conceptUri,
206+
IReadOnlySet<Uri> definedTerms,
207+
IReadOnlyDictionary<Uri, string> labels)
199208
{
200-
return HasType(graph, conceptNode, SchemaDefinedTermUri) ||
201-
TryGetLiteral(graph, conceptNode, SchemaNameUri) is not null;
209+
return definedTerms.Contains(conceptUri) || labels.ContainsKey(conceptUri);
202210
}
203211

204212
private static HashSet<Uri> CollectConceptUris(Graph graph)
@@ -227,13 +235,20 @@ private static void CollectTypedConceptUris(Graph graph, ISet<Uri> conceptUris)
227235
}
228236
}
229237

230-
private static bool HasType(Graph graph, IUriNode subject, Uri typeUri)
238+
private static HashSet<Uri> CollectTypedSubjects(Graph graph, Uri typeUri)
231239
{
232-
return graph.Triples.Any(triple => triple.Subject.Equals(subject) &&
233-
triple.Predicate is IUriNode predicateNode &&
234-
predicateNode.Uri == RdfTypeUri &&
235-
triple.Object is IUriNode objectNode &&
236-
objectNode.Uri == typeUri);
240+
var subjects = new HashSet<Uri>();
241+
foreach (var triple in graph.Triples.Where(static triple => triple.Predicate is IUriNode predicateNode && predicateNode.Uri == RdfTypeUri))
242+
{
243+
if (triple.Subject is IUriNode subjectNode &&
244+
triple.Object is IUriNode objectNode &&
245+
objectNode.Uri == typeUri)
246+
{
247+
subjects.Add(subjectNode.Uri);
248+
}
249+
}
250+
251+
return subjects;
237252
}
238253

239254
private static void CollectReferencedConceptUris(Graph graph, ISet<Uri> conceptUris, Uri predicateUri)
@@ -289,11 +304,13 @@ private static IEnumerable<string> ReadFrontMatterValues(IReadOnlyDictionary<str
289304
}
290305
}
291306

292-
private static void AssertConceptLabels(Graph graph, IUriNode conceptNode, IReadOnlyDictionary<Uri, string> aboutLabels)
307+
private static void AssertConceptLabels(
308+
Graph graph,
309+
IUriNode conceptNode,
310+
IReadOnlyDictionary<Uri, string> conceptLabels,
311+
IReadOnlyDictionary<Uri, string> aboutLabels)
293312
{
294-
var existingLabel = TryGetLiteral(graph, conceptNode, SchemaNameUri)
295-
?? TryGetLiteral(graph, conceptNode, SkosPrefLabelUri)
296-
?? TryGetLiteral(graph, conceptNode, RdfsLabelUri);
313+
var existingLabel = conceptLabels.GetValueOrDefault(conceptNode.Uri);
297314
if (existingLabel is not null)
298315
{
299316
graph.Assert(new Triple(conceptNode, graph.CreateUriNode(SkosPrefLabelUri), graph.CreateLiteralNode(existingLabel)));
@@ -308,25 +325,86 @@ private static void AssertConceptLabels(Graph graph, IUriNode conceptNode, IRead
308325
}
309326
}
310327

311-
private static string? TryGetLiteral(Graph graph, IUriNode subject, Uri predicateUri)
328+
private static Dictionary<Uri, string> CollectConceptLabels(Graph graph)
312329
{
313-
return graph.Triples
314-
.Where(triple => triple.Subject.Equals(subject) && triple.Predicate is IUriNode predicateNode && predicateNode.Uri == predicateUri && triple.Object is ILiteralNode)
315-
.Select(triple => ((ILiteralNode)triple.Object).Value)
316-
.FirstOrDefault();
330+
var labels = new Dictionary<Uri, LabelCandidate>();
331+
foreach (var triple in graph.Triples)
332+
{
333+
if (triple.Subject is not IUriNode subjectNode ||
334+
triple.Predicate is not IUriNode predicateNode ||
335+
triple.Object is not ILiteralNode literalNode)
336+
{
337+
continue;
338+
}
339+
340+
var priority = GetLabelPriority(predicateNode.Uri);
341+
if (priority == 0)
342+
{
343+
continue;
344+
}
345+
346+
var candidate = new LabelCandidate(literalNode.Value, priority);
347+
if (!labels.TryGetValue(subjectNode.Uri, out var existing) || candidate.Priority > existing.Priority)
348+
{
349+
labels[subjectNode.Uri] = candidate;
350+
}
351+
}
352+
353+
return labels.ToDictionary(static pair => pair.Key, static pair => pair.Value.Value);
317354
}
318355

319-
private static void MirrorSchemaSameAsAsSkosExactMatch(Graph graph, IUriNode conceptNode)
356+
private static int GetLabelPriority(Uri predicateUri)
320357
{
321-
var sameAsTriples = graph.Triples
322-
.Where(triple => triple.Subject.Equals(conceptNode) &&
323-
triple.Predicate is IUriNode predicateNode &&
324-
predicateNode.Uri == SchemaSameAsUri &&
325-
triple.Object is IUriNode)
326-
.ToArray();
327-
foreach (var triple in sameAsTriples)
358+
if (predicateUri == SchemaNameUri)
359+
{
360+
return 3;
361+
}
362+
363+
if (predicateUri == SkosPrefLabelUri)
364+
{
365+
return 2;
366+
}
367+
368+
return predicateUri == RdfsLabelUri ? 1 : 0;
369+
}
370+
371+
private static Dictionary<Uri, IReadOnlyList<IUriNode>> CollectUriTargets(Graph graph, Uri predicateUri)
372+
{
373+
var targets = new Dictionary<Uri, List<IUriNode>>();
374+
foreach (var triple in graph.Triples.Where(triple => triple.Predicate is IUriNode predicateNode && predicateNode.Uri == predicateUri))
375+
{
376+
if (triple.Subject is not IUriNode subjectNode || triple.Object is not IUriNode objectNode)
377+
{
378+
continue;
379+
}
380+
381+
if (!targets.TryGetValue(subjectNode.Uri, out var subjectTargets))
382+
{
383+
subjectTargets = [];
384+
targets[subjectNode.Uri] = subjectTargets;
385+
}
386+
387+
subjectTargets.Add(objectNode);
388+
}
389+
390+
return targets.ToDictionary(
391+
static pair => pair.Key,
392+
static pair => (IReadOnlyList<IUriNode>)pair.Value);
393+
}
394+
395+
private static void MirrorSchemaSameAsAsSkosExactMatch(
396+
Graph graph,
397+
IUriNode conceptNode,
398+
IReadOnlyDictionary<Uri, IReadOnlyList<IUriNode>> sameAsTargets)
399+
{
400+
if (!sameAsTargets.TryGetValue(conceptNode.Uri, out var targets))
401+
{
402+
return;
403+
}
404+
405+
foreach (var target in targets)
328406
{
329-
graph.Assert(new Triple(conceptNode, graph.CreateUriNode(SkosExactMatchUri), triple.Object));
407+
graph.Assert(new Triple(conceptNode, graph.CreateUriNode(SkosExactMatchUri), target));
330408
}
331409
}
332410

@@ -348,4 +426,6 @@ private static string GetResourceLabel(Uri resourceUri)
348426
? resourceUri.AbsoluteUri
349427
: lastSegment;
350428
}
429+
430+
private readonly record struct LabelCandidate(string Value, int Priority);
351431
}

0 commit comments

Comments
 (0)