Skip to content

Commit b62cde3

Browse files
committed
tests
1 parent 91fcd8c commit b62cde3

21 files changed

+617
-175
lines changed

AGENTS.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ Target capabilities:
6262
- Tests must verify the real Markdown -> graph -> query/search flow, including success, malformed input, and empty/no-match paths where relevant.
6363
- Tests must not use mocks, stubs, or fakes, except one local test implementation of `Microsoft.Extensions.AI.IChatClient` used to prove the LLM extraction boundary without network access.
6464
- Use TUnit for tests and Shouldly for assertions.
65-
- Fallbacks are forbidden. Do not silently substitute generic/default behaviour when parsing, extraction, graph building, query execution, or AI extraction fails; fail explicitly or skip the invalid fact with a caller-visible test.
66-
- Legacy or leftover old code is forbidden. Do not keep deprecated duplicate paths, compatibility shims, or unused old implementations; remove them immediately unless an ADR documents a temporary migration path.
65+
- Silent substitution paths are forbidden. Do not replace parsing, extraction, graph building, query execution, or AI extraction failures with generic/default behaviour; fail explicitly or skip the invalid fact with a caller-visible test.
66+
- Old leftover code is forbidden. Do not keep deprecated duplicate paths, compatibility shims, or unused old implementations; remove them immediately unless an ADR documents a temporary migration path.
6767

6868
## Global Skills
6969

@@ -87,9 +87,9 @@ List only the skills this solution actually uses.
8787

8888
- `restore`: `dotnet restore MarkdownLd.Kb.slnx`
8989
- `build`: `dotnet build MarkdownLd.Kb.slnx --no-restore`
90-
- `test`: `dotnet test MarkdownLd.Kb.slnx --configuration Release`
90+
- `test`: `dotnet test --solution MarkdownLd.Kb.slnx --configuration Release`
9191
- `format`: `dotnet format MarkdownLd.Kb.slnx --verify-no-changes`
92-
- `coverage`: `dotnet test MarkdownLd.Kb.slnx --configuration Release -- --coverage --coverage-output-format cobertura --coverage-output "$PWD/TestResults/TUnitCoverage/coverage.cobertura.xml" --coverage-settings "$PWD/CodeCoverage.runsettings"`
92+
- `coverage`: `dotnet test --solution MarkdownLd.Kb.slnx --configuration Release -- --coverage --coverage-output-format cobertura --coverage-output "$PWD/TestResults/TUnitCoverage/coverage.cobertura.xml" --coverage-settings "$PWD/CodeCoverage.runsettings"`
9393

9494
`.NET` runner policy:
9595

CodeCoverage.runsettings

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<RunSettings>
3+
<DataCollectionRunSettings>
4+
<DataCollectors>
5+
<DataCollector friendlyName="Code Coverage" uri="datacollector://Microsoft/CodeCoverage/2.0">
6+
<Configuration>
7+
<CodeCoverage>
8+
<ModulePaths>
9+
<Include>
10+
<ModulePath>.*ManagedCode\.MarkdownLd\.Kb\.dll$</ModulePath>
11+
</Include>
12+
<Exclude>
13+
<ModulePath>.*ManagedCode\.MarkdownLd\.Kb\.Tests\.dll$</ModulePath>
14+
</Exclude>
15+
</ModulePaths>
16+
<Sources>
17+
<Exclude>
18+
<Source>.*[\\/]obj[\\/].*</Source>
19+
</Exclude>
20+
</Sources>
21+
<UseVerifiableInstrumentation>True</UseVerifiableInstrumentation>
22+
<AllowLowIntegrityProcesses>True</AllowLowIntegrityProcesses>
23+
<CollectFromChildProcesses>True</CollectFromChildProcesses>
24+
<CollectAspDotNet>False</CollectAspDotNet>
25+
</CodeCoverage>
26+
</Configuration>
27+
</DataCollector>
28+
</DataCollectors>
29+
</DataCollectionRunSettings>
30+
</RunSettings>

README.md

Lines changed: 116 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,51 @@
11
# Markdown-LD Knowledge Bank
22

3-
Markdown-LD Knowledge Bank is a .NET 10 library for turning Markdown knowledge-base files into an in-memory RDF graph that can be searched and queried with read-only SPARQL.
3+
Markdown-LD Knowledge Bank is a .NET 10 library for turning Markdown knowledge-base files into an in-memory RDF graph that can be searched, queried with read-only SPARQL, exported as RDF, and rendered as a diagram.
44

55
It ports the core idea from [lqdev/markdown-ld-kb](https://github.com/lqdev/markdown-ld-kb) into a C# library package. The runtime is local and in-memory: no localhost server, no Azure Functions host, no database server, and no hosted graph service are required.
66

7+
Use it when you want plain Markdown notes to become a queryable knowledge graph without making your application depend on a specific model provider, graph server, or hosted indexing service.
8+
79
## What It Does
810

911
```mermaid
1012
flowchart LR
11-
Source["Markdown, MDX, text, JSON, YAML, CSV"] --> Converter["KnowledgeSourceDocumentConverter"]
12-
Converter --> Parser["MarkdownDocumentParser"]
13-
Parser --> Deterministic["Deterministic fact extraction"]
14-
Parser --> Chat["Optional IChatClient extraction"]
15-
Deterministic --> Merge["Fact merge and canonicalization"]
13+
Source["Markdown / MDX / text\nJSON / YAML / CSV"] --> Converter["KnowledgeSourceDocumentConverter"]
14+
Converter --> Parser["MarkdownDocumentParser\n→ MarkdownDocument"]
15+
Parser --> Det["DeterministicKnowledgeFactExtractor\n→ entities, assertions"]
16+
Parser --> Chat["ChatClientKnowledgeFactExtractor\n(optional IChatClient)"]
17+
Det --> Merge["KnowledgeFactMerger\n→ merged KnowledgeExtractionResult"]
1618
Chat --> Merge
17-
Merge --> Graph["dotNetRDF in-memory graph"]
18-
Graph --> Search["SearchAsync"]
19-
Graph --> Sparql["SELECT and ASK SPARQL"]
20-
Graph --> Export["Turtle and JSON-LD"]
19+
Merge --> Builder["KnowledgeGraphBuilder\n→ dotNetRDF in-memory graph"]
20+
Builder --> Search["SearchAsync"]
21+
Builder --> Sparql["ExecuteSelectAsync\nExecuteAskAsync"]
22+
Builder --> Snap["ToSnapshot"]
23+
Builder --> Diagram["SerializeMermaidFlowchart\nSerializeDotGraph"]
24+
Builder --> Export["SerializeTurtle\nSerializeJsonLd"]
2125
```
2226

23-
The pipeline extracts:
27+
**Deterministic extraction** produces facts without any network call:
2428

2529
- article identity, title, summary, dates, tags, authors, and topics from YAML front matter
26-
- heading sections and document identity from Markdown
30+
- heading sections and document identity from Markdown structure
2731
- Markdown links such as `[SPARQL](https://www.w3.org/TR/sparql11-query/)`
2832
- optional wikilinks such as `[[RDF]]`
2933
- optional assertion arrows such as `article --mentions--> RDF`
30-
- optional LLM-produced entities and assertions through `Microsoft.Extensions.AI.IChatClient`
34+
35+
**Optional AI extraction** enriches the graph with LLM-produced entities and assertions through `Microsoft.Extensions.AI.IChatClient`. No provider-specific SDK is required in the core library.
36+
37+
**Graph outputs:**
38+
39+
- `ToSnapshot()` — stable `KnowledgeGraphSnapshot` with `Nodes` and `Edges`
40+
- `SerializeMermaidFlowchart()` — Mermaid `graph LR` diagram
41+
- `SerializeDotGraph()` — Graphviz DOT diagram
42+
- `SerializeTurtle()` — Turtle RDF serialization
43+
- `SerializeJsonLd()` — JSON-LD serialization
44+
- `ExecuteSelectAsync(sparql)` — read-only SPARQL SELECT returning `SparqlQueryResult`
45+
- `ExecuteAskAsync(sparql)` — read-only SPARQL ASK returning `bool`
46+
- `SearchAsync(term)` — case-insensitive search across `schema:name`, `schema:description`, and `schema:keywords`, returning matching graph subjects as `SparqlQueryResult`
47+
48+
All async methods accept an optional `CancellationToken`.
3149

3250
## Install
3351

@@ -122,11 +140,11 @@ internal static class FileGraphDemo
122140
}
123141
```
124142

125-
`KnowledgeSourceDocumentConverter` supports Markdown and other text-like knowledge inputs: `.md`, `.markdown`, `.mdx`, `.txt`, `.text`, `.log`, `.csv`, `.json`, `.jsonl`, `.yaml`, and `.yml`.
143+
`KnowledgeSourceDocumentConverter` supports Markdown and other text-like knowledge inputs: `.md`, `.markdown`, `.mdx`, `.txt`, `.text`, `.log`, `.csv`, `.json`, `.jsonl`, `.yaml`, and `.yml`. Non-Markdown files are accepted as text sources and run through the same parsing, extraction, and graph build pipeline.
126144

127145
You do not need to pass a base URI for normal use. Document identity is resolved in this order:
128146

129-
- `canonicalUrl` or `canonical_url` in Markdown front matter
147+
- `KnowledgeDocumentConversionOptions.CanonicalUri` when you provide one
130148
- the file path, normalized the same way as the upstream project: `content/notes/rdf.md` becomes a stable document IRI
131149
- the generated inline document path when `BuildFromMarkdownAsync` is called without a path
132150

@@ -179,12 +197,12 @@ ASK WHERE {
179197

180198
The built-in chat extractor requests structured output through `GetResponseAsync<T>()`, normalizes the returned entity/assertion payload, merges it with deterministic facts, and then builds the same in-memory RDF graph used by search and SPARQL. Tests use one local non-network `IChatClient` implementation so the full extraction-to-graph flow is covered without a live model.
181199

182-
## Query And Export
200+
## Query The Graph
183201

184202
```csharp
185203
using ManagedCode.MarkdownLd.Kb.Pipeline;
186204

187-
internal static class QueryDemo
205+
internal static class QueryGraphDemo
188206
{
189207
private const string SelectQuery = """
190208
PREFIX schema: <https://schema.org/>
@@ -204,28 +222,71 @@ LIMIT 100
204222
public static async Task RunAsync(MarkdownKnowledgeBuildResult result)
205223
{
206224
var rows = await result.Graph.ExecuteSelectAsync(SelectQuery);
225+
var search = await result.Graph.SearchAsync(SearchTerm);
226+
207227
foreach (var row in rows.Rows)
208228
{
209229
Console.WriteLine(row.Values[ArticleKey]);
210230
Console.WriteLine(row.Values[TitleKey]);
211231
}
212232

213-
var search = await result.Graph.SearchAsync(SearchTerm);
214-
var turtle = result.Graph.SerializeTurtle();
215-
var jsonLd = result.Graph.SerializeJsonLd();
216-
217233
Console.WriteLine(search.Rows.Count);
234+
}
235+
}
236+
```
237+
238+
SPARQL execution is intentionally read-only. `SELECT` and `ASK` are allowed; mutation forms such as `INSERT`, `DELETE`, `LOAD`, `CLEAR`, `DROP`, and `CREATE` are rejected before execution.
239+
240+
## Export The Graph
241+
242+
```csharp
243+
using ManagedCode.MarkdownLd.Kb.Pipeline;
244+
245+
internal static class ExportGraphDemo
246+
{
247+
public static void Run(MarkdownKnowledgeBuildResult result)
248+
{
249+
KnowledgeGraphSnapshot snapshot = result.Graph.ToSnapshot();
250+
string mermaid = result.Graph.SerializeMermaidFlowchart();
251+
string dot = result.Graph.SerializeDotGraph();
252+
string turtle = result.Graph.SerializeTurtle();
253+
string jsonLd = result.Graph.SerializeJsonLd();
254+
255+
Console.WriteLine(snapshot.Nodes.Count);
256+
Console.WriteLine(snapshot.Edges.Count);
257+
Console.WriteLine(mermaid);
258+
Console.WriteLine(dot);
218259
Console.WriteLine(turtle.Length);
219260
Console.WriteLine(jsonLd.Length);
220261
}
221262
}
222263
```
223264

224-
SPARQL execution is intentionally read-only. `SELECT` and `ASK` are allowed; mutation forms such as `INSERT`, `DELETE`, `LOAD`, `CLEAR`, `DROP`, and `CREATE` are rejected before execution.
265+
`ToSnapshot()` returns a stable object graph with `Nodes` and `Edges` so callers can build their own UI, JSON endpoint, or visualization layer without touching dotNetRDF internals. URI node labels are resolved from `schema:name` when available, so diagram output is readable by default.
266+
267+
Example Mermaid output shape:
268+
269+
```mermaid
270+
graph LR
271+
n0["Zero Cost Knowledge Graph"]
272+
n1["RDF"]
273+
n0 -->|"schema:mentions"| n1
274+
```
275+
276+
Example DOT output shape:
277+
278+
```dot
279+
digraph KnowledgeGraph {
280+
rankdir=LR;
281+
"n0" [label="Zero Cost Knowledge Graph"];
282+
"n1" [label="RDF"];
283+
"n0" -> "n1" [label="schema:mentions"];
284+
}
285+
```
225286

226287
## Thread Safety
227288

228-
`KnowledgeGraph` is safe for shared in-memory read/write use through its public API. Search, read-only SPARQL, and serialization run under a read lock; `MergeAsync` snapshots a built graph and merges it under a write lock.
289+
`KnowledgeGraph` is safe for shared in-memory read/write use through its public API. Search, read-only SPARQL, snapshot export, diagram serialization, and RDF serialization run under a read lock; `MergeAsync` snapshots a built graph and merges it under a write lock.
229290

230291
Use this when many workers convert Markdown independently and publish their results into one graph:
231292

@@ -237,6 +298,21 @@ await shared.Graph.MergeAsync(next.Graph);
237298
var rows = await shared.Graph.SearchAsync("rdf");
238299
```
239300

301+
## Key Types
302+
303+
| Type | Purpose |
304+
|---|---|
305+
| `MarkdownKnowledgePipeline` | Entry point. Orchestrates parsing, extraction, merge, and graph build. |
306+
| `MarkdownKnowledgeBuildResult` | Holds `Documents`, `Facts`, and the built `Graph`. |
307+
| `KnowledgeGraph` | In-memory dotNetRDF graph with query, search, export, and merge. |
308+
| `KnowledgeGraphSnapshot` | Immutable view with `Nodes` (`KnowledgeGraphNode`) and `Edges` (`KnowledgeGraphEdge`). |
309+
| `MarkdownDocument` | Pipeline parsed document: `FrontMatter`, `Body`, and `Sections`. |
310+
| `MarkdownFrontMatter` | Typed access to YAML front matter fields. |
311+
| `KnowledgeExtractionResult` | Merged collection of `KnowledgeEntityFact` and `KnowledgeAssertionFact`. |
312+
| `SparqlQueryResult` | Query result with `Variables` and `Rows` of `SparqlRow`. |
313+
| `KnowledgeSourceDocumentConverter` | Converts files and directories into pipeline-ready source documents. |
314+
| `ChatClientKnowledgeFactExtractor` | AI extraction adapter behind `IChatClient`. |
315+
240316
## Markdown Conventions
241317

242318
```markdown
@@ -257,6 +333,20 @@ about:
257333
Use [RDF](https://www.w3.org/RDF/) and [SPARQL](https://www.w3.org/TR/sparql11-query/).
258334
```
259335

336+
Recognized front matter keys:
337+
338+
| Key | RDF property | Type |
339+
|---|---|---|
340+
| `title` | `schema:name` | string |
341+
| `description` / `summary` | `schema:description` | string |
342+
| `datePublished` | `schema:datePublished` | string (ISO date) |
343+
| `dateModified` | `schema:dateModified` | string (ISO date) |
344+
| `author` | `schema:author` | string or list |
345+
| `tags` / `keywords` | `schema:keywords` | list |
346+
| `about` | `schema:about` | list |
347+
| `canonicalUrl` / `canonical_url` | root parser document identity; use `KnowledgeDocumentConversionOptions.CanonicalUri` for pipeline identity | string (URL) |
348+
| `entity_hints` / `entityHints` | entity hints | list of `{label, type, sameAs}` |
349+
260350
Optional advanced predicate forms:
261351

262352
- `mentions` becomes `schema:mentions`
@@ -303,8 +393,8 @@ dotnet test --solution MarkdownLd.Kb.slnx --configuration Release -- --coverage
303393

304394
Current verification baseline:
305395

306-
- tests: 69 passed, 0 failed
307-
- line coverage: 96.06%
308-
- branch coverage: 85.22%
396+
- tests: 70 passed, 0 failed
397+
- line coverage: 95.93%
398+
- branch coverage: 84.55%
309399
- target framework: .NET 10
310400
- package version: 0.0.1

docs/ADR/ADR-0001-rdf-sparql-library.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ Mitigations:
164164
### Test commands
165165

166166
- build: `dotnet build MarkdownLd.Kb.slnx --no-restore`
167-
- test: `dotnet test MarkdownLd.Kb.slnx --configuration Release`
167+
- test: `dotnet test --solution MarkdownLd.Kb.slnx --configuration Release`
168168
- format: `dotnet format MarkdownLd.Kb.slnx --verify-no-changes`
169169
- coverage: `dotnet test --solution MarkdownLd.Kb.slnx --configuration Release -- --coverage --coverage-output-format cobertura --coverage-output "$PWD/TestResults/TUnitCoverage/coverage.cobertura.xml" --coverage-settings "$PWD/CodeCoverage.runsettings"`
170170

docs/ADR/ADR-0002-llm-extraction-ichatclient.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Mitigations:
112112
### Test commands
113113

114114
- build: `dotnet build MarkdownLd.Kb.slnx --no-restore`
115-
- test: `dotnet test MarkdownLd.Kb.slnx --configuration Release`
115+
- test: `dotnet test --solution MarkdownLd.Kb.slnx --configuration Release`
116116
- format: `dotnet format MarkdownLd.Kb.slnx --verify-no-changes`
117117
- coverage: `dotnet test --solution MarkdownLd.Kb.slnx --configuration Release -- --coverage --coverage-output-format cobertura --coverage-output "$PWD/TestResults/TUnitCoverage/coverage.cobertura.xml" --coverage-settings "$PWD/CodeCoverage.runsettings"`
118118

docs/Architecture.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ flowchart LR
9090
Merge --> WriteLock["write lock"]
9191
Search["SearchAsync"] --> ReadLock["read lock"]
9292
Select["ExecuteSelectAsync / ExecuteAskAsync"] --> ReadLock
93+
Snapshot["ToSnapshot"] --> ReadLock
94+
Diagram["SerializeMermaidFlowchart / SerializeDotGraph"] --> ReadLock
9395
Serialize["SerializeTurtle / SerializeJsonLd"] --> ReadLock
9496
WriteLock --> DotNetRdf["dotNetRDF Graph"]
9597
ReadLock --> DotNetRdf

src/MarkdownLd.Kb/AGENTS.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ Purpose: Production library for Markdown-LD Knowledge Bank.
1919
- Do not add localhost, HTTP server, background service, database server, or hosted API dependencies. The production pipeline must build, store, query, and serialize graphs in memory.
2020
- Do not add embedding/vector provider dependencies to the core pipeline. Future semantic/vector search must be optional and provider-neutral.
2121
- Keep the root namespace, assembly name, and package ID as `ManagedCode.MarkdownLd.Kb`.
22-
- Do not implement fallbacks. Invalid parsing, extraction, graph, or query inputs must fail explicitly or be skipped by a documented validation rule with tests.
23-
- Do not keep legacy or leftover implementation paths in this project. Remove obsolete code instead of wrapping or preserving it.
22+
- Do not implement silent substitution paths. Invalid parsing, extraction, graph, or query inputs must fail explicitly or be skipped by a documented validation rule with tests.
23+
- Do not keep old leftover implementation paths in this project. Remove obsolete code instead of wrapping or preserving it.
2424

2525
## Commands
2626

2727
- build: `dotnet build ../../MarkdownLd.Kb.slnx --no-restore`
28-
- test: `(cd ../.. && dotnet test MarkdownLd.Kb.slnx --configuration Release)`
29-
- coverage: `(cd ../.. && dotnet test MarkdownLd.Kb.slnx --configuration Release -- --coverage --coverage-output-format cobertura --coverage-output "$PWD/TestResults/TUnitCoverage/coverage.cobertura.xml" --coverage-settings "$PWD/CodeCoverage.runsettings")`
28+
- test: `(cd ../.. && dotnet test --solution MarkdownLd.Kb.slnx --configuration Release)`
29+
- coverage: `(cd ../.. && dotnet test --solution MarkdownLd.Kb.slnx --configuration Release -- --coverage --coverage-output-format cobertura --coverage-output "$PWD/TestResults/TUnitCoverage/coverage.cobertura.xml" --coverage-settings "$PWD/CodeCoverage.runsettings")`
3030

3131
## Local Risks
3232

src/MarkdownLd.Kb/Extraction/MarkdownFrontMatterParser.cs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,8 @@ private static bool TryParseFrontMatter(string frontMatterBlock, out MarkdownFro
4343
.IgnoreUnmatchedProperties()
4444
.Build();
4545

46-
var values = deserializer.Deserialize<Dictionary<string, object?>>(new StringReader(frontMatterBlock));
47-
if (values is null)
48-
{
49-
values = new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase);
50-
}
46+
var values = deserializer.Deserialize<Dictionary<string, object?>>(new StringReader(frontMatterBlock))
47+
?? new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase);
5148

5249
frontMatter = new MarkdownFrontMatter
5350
{

src/MarkdownLd.Kb/Parsing/MarkdownFrontMatterParser.cs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,8 @@ private static bool TryParseFrontMatter(string rawYaml, out MarkdownFrontMatter
3838
{
3939
try
4040
{
41-
var values = Deserializer.Deserialize<Dictionary<object, object?>>(rawYaml);
42-
if (values is null)
43-
{
44-
values = new Dictionary<object, object?>();
45-
}
41+
var values = Deserializer.Deserialize<Dictionary<object, object?>>(rawYaml)
42+
?? new Dictionary<object, object?>();
4643

4744
var normalized = NormalizeDictionary(values);
4845

src/MarkdownLd.Kb/Pipeline/KnowledgeFactMerger.cs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,16 @@ public sealed class KnowledgeFactMerger(Uri? baseUri = null)
66
{
77
private readonly Uri _baseUri = KnowledgeNaming.NormalizeBaseUri(baseUri ?? new Uri(DefaultBaseUriText, UriKind.Absolute));
88

9-
public KnowledgeExtractionResult Merge(params KnowledgeExtractionResult?[]? results)
9+
public KnowledgeExtractionResult Merge(params KnowledgeExtractionResult[] results)
1010
{
11+
ArgumentNullException.ThrowIfNull(results);
12+
1113
var entities = new Dictionary<string, KnowledgeEntityFact>(StringComparer.OrdinalIgnoreCase);
1214
var assertions = new Dictionary<string, KnowledgeAssertionFact>(StringComparer.OrdinalIgnoreCase);
1315

14-
if (results is null)
15-
{
16-
return new KnowledgeExtractionResult();
17-
}
18-
1916
foreach (var result in results)
2017
{
21-
if (result is null)
22-
{
23-
continue;
24-
}
18+
ArgumentNullException.ThrowIfNull(result);
2519

2620
foreach (var entity in result.Entities)
2721
{

0 commit comments

Comments
 (0)