Skip to content

Commit 6aa31e2

Browse files
committed
more graph stuff
1 parent 71a384c commit 6aa31e2

9 files changed

Lines changed: 791 additions & 239 deletions

docs/Features/CapabilityGraphRules.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ Rule values can be strings or maps. Strings become node labels. Maps can use `id
4242
- primary matches from graph metadata search when no token index is present
4343
- related matches from direct `kb:relatedTo` edges and shared `kb:memberOf` groups
4444
- next-step matches from direct `kb:nextStep` edges
45-
- a bounded focused graph snapshot containing the selected neighborhood
45+
- a bounded focused graph snapshot containing selected matches plus explanatory group nodes
46+
47+
## Diagnostics
48+
49+
Malformed caller-authored rule entries are skipped with caller-visible build diagnostics. The pipeline reports invalid shapes, missing predicates, unsupported predicates, missing objects, and blank node references in `MarkdownKnowledgeBuildResult.Diagnostics` instead of silently dropping them.
4650

4751
## Test Matrix
4852

@@ -52,7 +56,8 @@ Rule values can be strings or maps. Strings become node labels. Maps can use `id
5256
| Focused search | Returns a small primary set before related or next-step candidates |
5357
| Related expansion | Includes same-group and explicit related nodes |
5458
| Next-step expansion | Includes explicit `kb:nextStep` nodes |
55-
| Focused export | Mermaid/DOT export includes only selected graph neighborhood |
59+
| Malformed rules | Reports skipped rule entries in build diagnostics |
60+
| Focused export | Mermaid/DOT export includes only selected graph neighborhood and explanatory groups |
5661

5762
## Verification
5863

src/MarkdownLd.Kb/Pipeline/KnowledgeGraph.FocusedSearch.cs

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,16 @@ private static KnowledgeGraphSnapshot BuildFocusedGraph(
175175
IReadOnlyList<KnowledgeGraphFocusedSearchMatch> related,
176176
IReadOnlyList<KnowledgeGraphFocusedSearchMatch> nextSteps)
177177
{
178-
var includedNodeIds = primary
178+
var selectedMatchIds = primary
179179
.Concat(related)
180180
.Concat(nextSteps)
181181
.Select(static match => match.NodeId)
182182
.ToHashSet(StringComparer.Ordinal);
183-
var includedEdges = SelectFocusedEdges(snapshot, includedNodeIds).ToArray();
184-
185-
foreach (var edge in includedEdges)
186-
{
187-
includedNodeIds.Add(edge.SubjectId);
188-
includedNodeIds.Add(edge.ObjectId);
189-
}
183+
var explanatoryGroupIds = SelectExplanatoryGroupIds(snapshot, selectedMatchIds);
184+
var includedNodeIds = selectedMatchIds
185+
.Concat(explanatoryGroupIds)
186+
.ToHashSet(StringComparer.Ordinal);
187+
var includedEdges = SelectFocusedEdges(snapshot, selectedMatchIds, explanatoryGroupIds).ToArray();
190188

191189
return new KnowledgeGraphSnapshot(
192190
snapshot.Nodes
@@ -196,29 +194,33 @@ private static KnowledgeGraphSnapshot BuildFocusedGraph(
196194
includedEdges);
197195
}
198196

199-
private static IEnumerable<KnowledgeGraphEdge> SelectFocusedEdges(
197+
private static IReadOnlySet<string> SelectExplanatoryGroupIds(
200198
KnowledgeGraphSnapshot snapshot,
201-
IReadOnlySet<string> selectedNodeIds)
199+
IReadOnlySet<string> selectedMatchIds)
202200
{
203-
var selectedGroupIds = snapshot.Edges
204-
.Where(edge => selectedNodeIds.Contains(edge.SubjectId) && edge.PredicateLabel == KbMemberOf)
201+
return snapshot.Edges
202+
.Where(edge => selectedMatchIds.Contains(edge.SubjectId) && edge.PredicateLabel == KbMemberOf)
205203
.Select(static edge => edge.ObjectId)
206204
.ToHashSet(StringComparer.Ordinal);
205+
}
207206

207+
private static IEnumerable<KnowledgeGraphEdge> SelectFocusedEdges(
208+
KnowledgeGraphSnapshot snapshot,
209+
IReadOnlySet<string> selectedMatchIds,
210+
IReadOnlySet<string> explanatoryGroupIds)
211+
{
208212
return snapshot.Edges
209213
.Where(edge =>
210-
(selectedNodeIds.Contains(edge.SubjectId) && selectedNodeIds.Contains(edge.ObjectId)) ||
211-
(selectedNodeIds.Contains(edge.SubjectId) && selectedGroupIds.Contains(edge.ObjectId)) ||
212-
(selectedNodeIds.Contains(edge.SubjectId) && IsFocusedPredicate(edge.PredicateLabel)))
214+
(selectedMatchIds.Contains(edge.SubjectId) && selectedMatchIds.Contains(edge.ObjectId)) ||
215+
(selectedMatchIds.Contains(edge.SubjectId) &&
216+
explanatoryGroupIds.Contains(edge.ObjectId) &&
217+
edge.PredicateLabel == KbMemberOf))
213218
.OrderBy(static edge => edge.SubjectId, StringComparer.Ordinal)
214219
.ThenBy(static edge => edge.PredicateId, StringComparer.Ordinal)
215220
.ThenBy(static edge => edge.ObjectId, StringComparer.Ordinal)
216221
.ToArray();
217222
}
218223

219-
private static bool IsFocusedPredicate(string predicateLabel)
220-
=> predicateLabel is KbMemberOf or KbRelatedTo or KbNextStep;
221-
222224
private static bool IsArticleNode(KnowledgeGraphSnapshot snapshot, string nodeId)
223225
=> snapshot.Edges.Any(edge =>
224226
edge.SubjectId == nodeId &&
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
namespace ManagedCode.MarkdownLd.Kb.Pipeline;
2+
3+
internal sealed record KnowledgeGraphRuleExtractionResult(
4+
KnowledgeExtractionResult Facts,
5+
IReadOnlyList<string> Diagnostics);
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
using System.Diagnostics.CodeAnalysis;
2+
using static ManagedCode.MarkdownLd.Kb.Pipeline.PipelineConstants;
3+
4+
namespace ManagedCode.MarkdownLd.Kb.Pipeline;
5+
6+
internal sealed partial class KnowledgeGraphRuleExtractor
7+
{
8+
private string? ReadMapNodeId(
9+
IReadOnlyDictionary<string, object?> map,
10+
MarkdownDocument document,
11+
params string[] keys)
12+
{
13+
foreach (var key in keys)
14+
{
15+
if (TryReadString(map, key, out var value))
16+
{
17+
return ResolveNodeId(document, value);
18+
}
19+
}
20+
21+
return null;
22+
}
23+
24+
private bool TryReadNodeReference(
25+
object? item,
26+
MarkdownDocument document,
27+
[NotNullWhen(true)] out GraphNodeReference? node)
28+
{
29+
if (item is IReadOnlyDictionary<string, object?> map)
30+
{
31+
return TryReadMapNodeReference(map, document, out node);
32+
}
33+
34+
var text = item?.ToString()?.Trim();
35+
if (string.IsNullOrWhiteSpace(text))
36+
{
37+
node = null;
38+
return false;
39+
}
40+
41+
node = new GraphNodeReference(
42+
ResolveNodeId(document, text),
43+
text,
44+
null,
45+
[],
46+
FullConfidence,
47+
!IsExternalIdentifier(text));
48+
return true;
49+
}
50+
51+
private bool TryReadMapNodeReference(
52+
IReadOnlyDictionary<string, object?> map,
53+
MarkdownDocument document,
54+
[NotNullWhen(true)] out GraphNodeReference? node)
55+
{
56+
var label = ReadFirstString(map, LabelKey, NameKey, ValueKey, TargetKey, ObjectKey);
57+
var idText = ReadFirstString(map, IdKey, TargetIdKey, TargetIdSnakeKey, ObjectIdKey, ObjectIdSnakeKey);
58+
if (string.IsNullOrWhiteSpace(label))
59+
{
60+
label = idText;
61+
}
62+
63+
if (string.IsNullOrWhiteSpace(label) && string.IsNullOrWhiteSpace(idText))
64+
{
65+
node = null;
66+
return false;
67+
}
68+
69+
var type = ReadFirstString(map, TypeKey);
70+
var sameAs = ReadStringList(map, SameAsKey, SameAsSnakeKey).ToList();
71+
node = new GraphNodeReference(
72+
string.IsNullOrWhiteSpace(idText)
73+
? ResolveNodeId(document, label!)
74+
: ResolveNodeId(document, idText),
75+
label ?? idText!,
76+
type,
77+
sameAs,
78+
FullConfidence,
79+
ShouldAddEntity(label, idText, type, sameAs));
80+
return true;
81+
}
82+
83+
private string ResolveNodeId(MarkdownDocument document, string? value)
84+
{
85+
var text = value?.Trim();
86+
if (string.IsNullOrWhiteSpace(text) ||
87+
text.Equals(ArticleMarker, StringComparison.OrdinalIgnoreCase) ||
88+
text.Equals(ThisArticleMarker, StringComparison.OrdinalIgnoreCase) ||
89+
text.Equals(DefaultDocument, StringComparison.OrdinalIgnoreCase))
90+
{
91+
return document.DocumentUri.AbsoluteUri;
92+
}
93+
94+
if (Uri.TryCreate(text, UriKind.Absolute, out var absolute))
95+
{
96+
return absolute.AbsoluteUri;
97+
}
98+
99+
if (text.StartsWith(UriSchemePrefix, StringComparison.OrdinalIgnoreCase))
100+
{
101+
return text;
102+
}
103+
104+
return KnowledgeNaming.CreateEntityId(_baseUri, text);
105+
}
106+
107+
private static IEnumerable<GraphRuleFrontMatterItem> ReadFrontMatterItems(
108+
IReadOnlyDictionary<string, object?> frontMatter,
109+
params string[] keys)
110+
{
111+
foreach (var key in keys)
112+
{
113+
if (!frontMatter.TryGetValue(key, out var raw))
114+
{
115+
continue;
116+
}
117+
118+
var index = 0;
119+
if (raw is IEnumerable<object?> list && raw is not string)
120+
{
121+
foreach (var item in list)
122+
{
123+
yield return new GraphRuleFrontMatterItem(key, index, item);
124+
index++;
125+
}
126+
}
127+
else
128+
{
129+
yield return new GraphRuleFrontMatterItem(key, index, raw);
130+
}
131+
}
132+
}
133+
134+
private static string? ReadFirstString(IReadOnlyDictionary<string, object?> map, params string[] keys)
135+
{
136+
foreach (var key in keys)
137+
{
138+
if (TryReadString(map, key, out var value))
139+
{
140+
return value;
141+
}
142+
}
143+
144+
return null;
145+
}
146+
147+
private static IEnumerable<string> ReadStringList(
148+
IReadOnlyDictionary<string, object?> map,
149+
params string[] keys)
150+
{
151+
foreach (var key in keys)
152+
{
153+
if (!map.TryGetValue(key, out var raw))
154+
{
155+
continue;
156+
}
157+
158+
if (raw is IEnumerable<object?> list && raw is not string)
159+
{
160+
foreach (var item in list)
161+
{
162+
var text = item?.ToString()?.Trim();
163+
if (!string.IsNullOrWhiteSpace(text))
164+
{
165+
yield return text;
166+
}
167+
}
168+
}
169+
else if (!string.IsNullOrWhiteSpace(raw?.ToString()))
170+
{
171+
yield return raw.ToString()!.Trim();
172+
}
173+
}
174+
}
175+
176+
private static bool TryReadString(
177+
IReadOnlyDictionary<string, object?> map,
178+
string key,
179+
[NotNullWhen(true)] out string? value)
180+
{
181+
if (map.TryGetValue(key, out var raw) && !string.IsNullOrWhiteSpace(raw?.ToString()))
182+
{
183+
value = raw.ToString()!.Trim();
184+
return true;
185+
}
186+
187+
value = null;
188+
return false;
189+
}
190+
191+
private static bool ShouldAddEntity(
192+
string? label,
193+
string? idText,
194+
string? type,
195+
IReadOnlyCollection<string> sameAs)
196+
{
197+
if (!string.IsNullOrWhiteSpace(type) || sameAs.Count > 0)
198+
{
199+
return true;
200+
}
201+
202+
if (!string.IsNullOrWhiteSpace(idText))
203+
{
204+
return !string.Equals(label, idText, StringComparison.OrdinalIgnoreCase);
205+
}
206+
207+
return !IsExternalIdentifier(label);
208+
}
209+
210+
private static bool IsExternalIdentifier(string? value)
211+
{
212+
return !string.IsNullOrWhiteSpace(value) &&
213+
Uri.TryCreate(value.Trim(), UriKind.Absolute, out _);
214+
}
215+
216+
private sealed record GraphRuleFrontMatterItem(
217+
string RuleName,
218+
int Index,
219+
object? Value);
220+
221+
private sealed record GraphNodeReference(
222+
string Id,
223+
string Label,
224+
string? Type,
225+
List<string> SameAs,
226+
double Confidence,
227+
bool ShouldAddEntity);
228+
}

0 commit comments

Comments
 (0)