Skip to content

Commit 53fb9bf

Browse files
committed
code
1 parent daa23d5 commit 53fb9bf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2231
-1159
lines changed

AGENTS.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ Target capabilities:
4848
## Durable Project Rules
4949

5050
- Keep the core Markdown-to-graph pipeline deterministic and testable without network access.
51+
- Keep the core runtime in-memory. Do not introduce localhost, HTTP server, background service, database server, or hosted API dependencies into the production library.
5152
- Treat LLM/entity extraction as an adapter behind a small interface and implement that adapter through `Microsoft.Extensions.AI.IChatClient` from the start.
53+
- Do not add an embedding dependency to the core graph pipeline. If vector/semantic indexing is added later, expose it as an optional adapter boundary through `Microsoft.Extensions.AI.IEmbeddingGenerator<,>` or a similarly small port, with the concrete provider owned by the host app.
5254
- It is allowed for the production library to reference `Microsoft.Extensions.AI.Abstractions`; concrete OpenAI/Azure/Foundry providers must remain app-level dependencies unless an ADR says otherwise.
5355
- The product/library name is `Markdown-LD Knowledge Bank`; do not rename it to a shorter marketing name.
5456
- The C# root namespace, assembly identity, and package ID MUST be `ManagedCode.MarkdownLd.Kb`.
@@ -85,7 +87,7 @@ List only the skills this solution actually uses.
8587
- `build`: `dotnet build MarkdownLd.Kb.slnx --no-restore`
8688
- `test`: `dotnet test MarkdownLd.Kb.slnx --no-build`
8789
- `format`: `dotnet format MarkdownLd.Kb.slnx --verify-no-changes`
88-
- `coverage`: `dotnet test MarkdownLd.Kb.slnx --collect:"XPlat Code Coverage"`
90+
- `coverage`: `dotnet test MarkdownLd.Kb.slnx --no-build --coverlet --coverlet-output-format cobertura --coverlet-include '[ManagedCode.MarkdownLd.Kb]*' --results-directory TestResults/CoverletMtpFiltered`
8991

9092
`.NET` runner policy:
9193

@@ -94,7 +96,7 @@ List only the skills this solution actually uses.
9496
- Test framework: TUnit.
9597
- Assertion library: Shouldly.
9698
- Test runner model: TUnit through `dotnet test` / Microsoft.Testing.Platform-compatible execution.
97-
- Coverage: Coverlet XPlat collector unless a later ADR moves coverage to `coverlet.MTP` or another .NET 10-compatible collector.
99+
- Coverage: `coverlet.MTP` through `dotnet test --coverlet` with the production assembly include filter.
98100

99101
### Project AGENTS Policy
100102

docs/ADR/ADR-0001-rdf-sparql-library.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ Related Features: `docs/Architecture.md`
1010

1111
- [x] Analyze upstream graph stack and .NET options.
1212
- [x] Choose the RDF/SPARQL dependency.
13-
- [ ] Add dotNetRDF to the production project.
14-
- [ ] Add flow tests that query generated graphs through SPARQL.
15-
- [ ] Run build, test, format, and coverage commands.
16-
- [ ] Update `docs/Architecture.md` if dependency boundaries change.
13+
- [x] Add dotNetRDF to the production project.
14+
- [x] Add flow tests that query generated graphs through SPARQL.
15+
- [x] Run build, test, format, and coverage commands.
16+
- [x] Update `docs/Architecture.md` if dependency boundaries change.
1717

1818
## Context
1919

@@ -46,6 +46,7 @@ Use dotNetRDF as the RDF graph, serialization, and SPARQL engine for the first .
4646
Key points:
4747

4848
- dotNetRDF replaces Python RDFLib for the C# port.
49+
- The selected package supports RDF/SPARQL in .NET and the user guide documents in-memory RDF data and in-memory SPARQL querying, which matches the no-server core runtime boundary.
4950
- Markdig and YamlDotNet will handle Markdown/front matter parsing separately.
5051
- AI extraction remains behind an extraction port that uses `Microsoft.Extensions.AI.IChatClient`; provider/orchestration packages are not part of this RDF dependency decision.
5152

@@ -98,7 +99,7 @@ flowchart LR
9899

99100
Mitigations:
100101

101-
- Hide dependency details behind `KnowledgeGraph`, `KnowledgeQueryService`, and serialization methods where practical.
102+
- Hide dependency details behind `KnowledgeGraph` query methods, `KnowledgeSearchService`, and serialization methods where practical.
102103
- Add tests for serialization and SPARQL query paths.
103104
- Keep remote/federated SPARQL out of the first slice.
104105

@@ -165,7 +166,7 @@ Mitigations:
165166
- build: `dotnet build MarkdownLd.Kb.slnx --no-restore`
166167
- test: `dotnet test MarkdownLd.Kb.slnx --no-build`
167168
- format: `dotnet format MarkdownLd.Kb.slnx --verify-no-changes`
168-
- coverage: `dotnet test MarkdownLd.Kb.slnx --collect:"XPlat Code Coverage"`
169+
- coverage: `dotnet test --solution MarkdownLd.Kb.slnx --no-build --coverlet --coverlet-output-format cobertura --coverlet-include '[ManagedCode.MarkdownLd.Kb]*' --results-directory TestResults/CoverletMtpFiltered`
169170

170171
### New or changed tests
171172

@@ -187,4 +188,5 @@ No migration exists yet. This is the initial implementation decision.
187188
- `external/lqdev-markdown-ld-kb/tools/postprocess.py`
188189
- `external/lqdev-markdown-ld-kb/api/function_app.py`
189190
- `external/lqdev-markdown-ld-kb/.ai-memex/blog-post-zero-cost-knowledge-graph-from-markdown.md`
190-
- `https://github.com/dotnetrdf/dotnetrdf`
191+
- dotNetRDF upstream repository: `https://github.com/dotnetrdf/dotnetrdf`
192+
- dotNetRDF user guide: `https://dotnetrdf.org/docs/stable/user_guide/index.html`

docs/ADR/ADR-0002-llm-extraction-ichatclient.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ Mitigations:
114114
- build: `dotnet build MarkdownLd.Kb.slnx --no-restore`
115115
- test: `dotnet test MarkdownLd.Kb.slnx --no-build`
116116
- format: `dotnet format MarkdownLd.Kb.slnx --verify-no-changes`
117-
- coverage: `dotnet test MarkdownLd.Kb.slnx --collect:"XPlat Code Coverage"`
117+
- coverage: `dotnet test --solution MarkdownLd.Kb.slnx --no-build --coverlet --coverlet-output-format cobertura --coverlet-include '[ManagedCode.MarkdownLd.Kb]*' --results-directory TestResults/CoverletMtpFiltered`
118118

119119
## References
120120

docs/Architecture.md

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,26 @@ Date: 2026-04-11
44

55
## Purpose
66

7-
Markdown-LD Knowledge Bank is a .NET 10 library for converting human-authored Markdown knowledge-base files into an RDF knowledge graph and querying that graph through SPARQL or higher-level search APIs.
7+
Markdown-LD Knowledge Bank is a .NET 10 library for converting human-authored Markdown knowledge-base files into an in-memory RDF knowledge graph and querying that graph through SPARQL or higher-level search APIs.
88

99
The upstream reference repository is kept as a read-only submodule at `external/lqdev-markdown-ld-kb`. This C# implementation ports the technology, not the Python file layout.
1010

11+
The core runtime has no localhost, HTTP server, background service, database server, or hosted API dependency. Callers pass files, directories, or in-memory document content into the library, and the library returns in-memory graph/search/query results.
12+
13+
The first-slice graph/search model does not require embeddings. The only AI boundary in the core pipeline is `Microsoft.Extensions.AI.IChatClient` for optional entity/assertion extraction. If semantic vector search is added later, it should be a separate optional adapter over `Microsoft.Extensions.AI.IEmbeddingGenerator<,>` or an equivalent small port, with the concrete provider owned by the host app.
14+
1115
## System Boundaries
1216

1317
```mermaid
1418
flowchart LR
1519
Author["Markdown author"] --> MarkdownFiles["Markdown files"]
16-
MarkdownFiles --> Loader["Document loader"]
20+
MarkdownFiles --> Loader["In-memory document converter and loader"]
1721
Loader --> Parser["Markdown parser and chunker"]
1822
Parser --> Extractor["Fact extractor port"]
1923
Extractor --> Builder["RDF graph builder"]
20-
Builder --> Graph["Knowledge graph"]
21-
Graph --> Sparql["SPARQL query service"]
22-
Graph --> Search["Graph search service"]
24+
Builder --> Graph["In-memory knowledge graph"]
25+
Graph --> Sparql["In-memory SPARQL executor API"]
26+
Graph --> Search["In-memory graph search API"]
2327
Graph --> Serializers["Turtle and JSON-LD serializers"]
2428
IChatClient["Microsoft.Extensions.AI IChatClient"] --> Extractor
2529
AgentFramework["Future Microsoft Agent Framework orchestration"] -. "wraps IChatClient" .-> IChatClient
@@ -35,7 +39,7 @@ sequenceDiagram
3539
participant Extractor as IKnowledgeFactExtractor
3640
participant Chat as IChatClient
3741
participant Graph as KnowledgeGraphBuilder
38-
participant Query as KnowledgeQueryService
42+
participant Query as InMemorySparqlExecutor
3943
4044
Caller->>Pipeline: BuildAsync(documents, options)
4145
Pipeline->>Parser: Parse Markdown and front matter
@@ -45,7 +49,7 @@ sequenceDiagram
4549
Chat-->>Extractor: Knowledge extraction result
4650
Extractor-->>Pipeline: Article, entities, assertions
4751
Pipeline->>Graph: Add facts as RDF triples
48-
Graph-->>Pipeline: KnowledgeGraph
52+
Graph-->>Pipeline: In-memory KnowledgeGraph
4953
Caller->>Query: ExecuteSelect(graph, sparql)
5054
Query-->>Caller: SPARQL bindings
5155
```
@@ -82,7 +86,7 @@ flowchart TB
8286
| `tools/chunker.py` | `MarkdownDocumentParser` | YAML front matter, stable document ID, heading sections, stable chunk IDs |
8387
| `tools/postprocess.py` | `DeterministicKnowledgeFactExtractor`, RDF builders | slug IDs, entity canonicalization, assertion de-duplication, schema.org/kb/prov vocabulary |
8488
| `tools/kg_build.py` | `MarkdownKnowledgePipeline` | orchestrates parse -> extract -> graph build -> query-ready graph |
85-
| `api/function_app.py` | `KnowledgeQueryService` | SELECT/ASK safety, SPARQL execution, JSON result shape at library level |
89+
| `api/function_app.py` | `KnowledgeGraph` query methods and `KnowledgeSearchService` | SELECT/ASK safety, in-memory SPARQL execution, JSON result shape at library level without a hosted function/server |
8690
| `tools/llm_client.py` | `ChatClientKnowledgeFactExtractor` | structured LLM extraction through `Microsoft.Extensions.AI.IChatClient` |
8791
| `api/nl_to_sparql.py` | future query adapter | schema-injected NL-to-SPARQL through `IChatClient`; Microsoft Agent Framework may orchestrate this later |
8892
| `ontology/*.ttl`, `ontology/context.jsonld` | `KnowledgeGraphNamespaces` | schema.org, kb, prov, rdf, xsd namespaces |
@@ -92,6 +96,7 @@ flowchart TB
9296
- Parsing depends on Markdig and YamlDotNet.
9397
- RDF graph building and SPARQL execution depend on dotNetRDF.
9498
- LLM extraction depends on `Microsoft.Extensions.AI.Abstractions` and accepts `IChatClient`.
99+
- Embeddings are not required for the core graph build/query flow.
95100
- Public API should prefer repository types over raw dependency types when feasible.
96101
- AI adapters depend on the core extraction port. The core library must not depend on concrete provider packages or agent orchestration packages in the first slice.
97102

@@ -117,5 +122,7 @@ Coverage requirement: 95%+ line coverage for changed production code.
117122
- Upstream reference repository: `external/lqdev-markdown-ld-kb`
118123
- Blog pattern: `external/lqdev-markdown-ld-kb/.ai-memex/blog-post-zero-cost-knowledge-graph-from-markdown.md`
119124
- NL-to-SPARQL pattern: `external/lqdev-markdown-ld-kb/.ai-memex/pattern-nl-to-sparql-schema-injected-few-shot.md`
125+
- dotNetRDF upstream repository: `https://github.com/dotnetrdf/dotnetrdf`
126+
- dotNetRDF user guide: `https://dotnetrdf.org/docs/stable/user_guide/index.html`
120127
- RDF/SPARQL dependency decision: `docs/ADR/ADR-0001-rdf-sparql-library.md`
121128
- LLM extraction dependency decision: `docs/ADR/ADR-0002-llm-extraction-ichatclient.md`

src/MarkdownLd.Kb/AGENTS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ Purpose: Production library for Markdown-LD Knowledge Bank.
1616
- The production library may reference `Microsoft.Extensions.AI` abstractions, Markdig, YamlDotNet, and dotNetRDF.
1717
- Do not add provider-specific LLM SDKs here.
1818
- Do not add Microsoft Agent Framework packages here without a new ADR.
19+
- Do not add localhost, HTTP server, background service, database server, or hosted API dependencies. The production pipeline must build, store, query, and serialize graphs in memory.
20+
- Do not add embedding/vector provider dependencies to the core pipeline. Future semantic/vector search must be optional and provider-neutral.
1921
- Keep the root namespace, assembly name, and package ID as `ManagedCode.MarkdownLd.Kb`.
2022

2123
## Commands
2224

2325
- build: `dotnet build ../../MarkdownLd.Kb.slnx --no-restore`
2426
- test: `dotnet test ../../MarkdownLd.Kb.slnx --no-build`
25-
- coverage: `dotnet test ../../MarkdownLd.Kb.slnx --collect:"XPlat Code Coverage"`
27+
- coverage: `dotnet test ../../MarkdownLd.Kb.slnx --no-build --coverlet --coverlet-output-format cobertura --coverlet-include '[ManagedCode.MarkdownLd.Kb]*' --results-directory ../../TestResults/CoverletMtpFiltered`
2628

2729
## Local Risks
2830

src/MarkdownLd.Kb/Ai/ChatClientKnowledgeFactExtractor.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public async Task<KnowledgeFactExtractionResult> ExtractAsync(
5858
request.ChunkId,
5959
[],
6060
[],
61-
string.Empty);
61+
EmptyString);
6262
}
6363

6464
var messages = new[]
@@ -81,7 +81,7 @@ public async Task<KnowledgeFactExtractionResult> ExtractAsync(
8181
request.ChunkId,
8282
[],
8383
[],
84-
response.Text ?? string.Empty);
84+
response.Text ?? EmptyString);
8585
}
8686

8787
var entities = NormalizeEntities(envelope.Entities);
@@ -92,7 +92,7 @@ public async Task<KnowledgeFactExtractionResult> ExtractAsync(
9292
request.ChunkId,
9393
entities,
9494
assertions,
95-
response.Text ?? string.Empty);
95+
response.Text ?? EmptyString);
9696
}
9797

9898
private ChatOptions BuildChatOptions()
@@ -128,7 +128,7 @@ private static string BuildUserPrompt(KnowledgeFactExtractionRequest request)
128128
.Append(FrontMatterItemPrefix)
129129
.Append(pair.Key)
130130
.Append(KeyValueSeparator)
131-
.AppendLine(pair.Value ?? string.Empty);
131+
.AppendLine(pair.Value ?? EmptyString);
132132
}
133133
}
134134

src/MarkdownLd.Kb/Ai/KnowledgeFactConstants.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ internal static class KnowledgeFactConstants
1010
internal const string SchemaCreativeWork = "schema:CreativeWork";
1111
internal const string SchemaArticle = "schema:Article";
1212
internal const string SchemaThing = "schema:Thing";
13+
internal const string EmptyString = "";
1314

1415
internal const string ArticleIdLabel = "ARTICLE_ID: ";
1516
internal const string ChunkIdLabel = "CHUNK_ID: ";

src/MarkdownLd.Kb/Extraction/MarkdownFrontMatterParser.cs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ public static MarkdownFrontMatterParseResult Parse(string markdown)
1313
{
1414
if (string.IsNullOrWhiteSpace(markdown))
1515
{
16-
return new MarkdownFrontMatterParseResult(new MarkdownFrontMatter(), string.Empty, false);
16+
return new MarkdownFrontMatterParseResult(new MarkdownFrontMatter(), Empty, false);
1717
}
1818

1919
var normalized = markdown.TrimStart('\uFEFF').Replace(CarriageReturnLineFeed, LineFeed);
@@ -78,7 +78,7 @@ private static (string FrontMatter, string Body, bool HasFrontMatter) SplitFront
7878
var firstLine = reader.ReadLine();
7979
if (!string.Equals(firstLine?.Trim(), FrontMatterFence, StringComparison.Ordinal))
8080
{
81-
return (string.Empty, markdown, false);
81+
return (Empty, markdown, false);
8282
}
8383

8484
var frontMatter = new StringBuilder();
@@ -94,7 +94,7 @@ private static (string FrontMatter, string Body, bool HasFrontMatter) SplitFront
9494
frontMatter.AppendLine(line);
9595
}
9696

97-
return (string.Empty, markdown, false);
97+
return (Empty, markdown, false);
9898
}
9999

100100
private static string? ReadString(IReadOnlyDictionary<string, object?> values, string key)
@@ -164,19 +164,19 @@ private static IReadOnlyList<MarkdownAuthor> ReadAuthors(IReadOnlyDictionary<str
164164
{
165165
return new MarkdownAuthor
166166
{
167-
Name = ReadString(map, NameKey) ?? ReadString(map, LabelKey) ?? string.Empty,
167+
Name = ReadString(map, NameKey) ?? ReadString(map, LabelKey) ?? Empty,
168168
SameAs = ReadString(map, SameAsKey) ?? ReadString(map, SameAsSnakeKey),
169169
Type = ReadString(map, TypeKey),
170170
};
171171
}
172172

173173
if (item is IDictionary<object, object?> dynamicMap)
174174
{
175-
var dictionary = dynamicMap.ToDictionary(entry => entry.Key.ToString() ?? string.Empty, entry => entry.Value, StringComparer.OrdinalIgnoreCase);
175+
var dictionary = dynamicMap.ToDictionary(entry => entry.Key.ToString() ?? Empty, entry => entry.Value, StringComparer.OrdinalIgnoreCase);
176176
return ReadAuthor(dictionary);
177177
}
178178

179-
return new MarkdownAuthor { Name = item.ToString()?.Trim() ?? string.Empty };
179+
return new MarkdownAuthor { Name = item.ToString()?.Trim() ?? Empty };
180180
}
181181

182182
private static IReadOnlyList<MarkdownTopic> ReadTopics(IReadOnlyDictionary<string, object?> values, string key)
@@ -210,18 +210,18 @@ private static IReadOnlyList<MarkdownTopic> ReadTopics(IReadOnlyDictionary<strin
210210
{
211211
return new MarkdownTopic
212212
{
213-
Label = ReadString(map, LabelKey) ?? ReadString(map, NameKey) ?? ReadString(map, ValueKey) ?? string.Empty,
213+
Label = ReadString(map, LabelKey) ?? ReadString(map, NameKey) ?? ReadString(map, ValueKey) ?? Empty,
214214
SameAs = ReadString(map, SameAsKey) ?? ReadString(map, SameAsSnakeKey),
215215
};
216216
}
217217

218218
if (item is IDictionary<object, object?> dynamicMap)
219219
{
220-
var dictionary = dynamicMap.ToDictionary(entry => entry.Key.ToString() ?? string.Empty, entry => entry.Value, StringComparer.OrdinalIgnoreCase);
220+
var dictionary = dynamicMap.ToDictionary(entry => entry.Key.ToString() ?? Empty, entry => entry.Value, StringComparer.OrdinalIgnoreCase);
221221
return ReadTopic(dictionary);
222222
}
223223

224-
return new MarkdownTopic { Label = NormalizeLabel(item.ToString() ?? string.Empty) };
224+
return new MarkdownTopic { Label = NormalizeLabel(item.ToString() ?? Empty) };
225225
}
226226

227227
private static IReadOnlyList<MarkdownEntityHint> ReadEntityHints(IReadOnlyDictionary<string, object?> values)
@@ -255,19 +255,19 @@ private static IReadOnlyList<MarkdownEntityHint> ReadEntityHints(IReadOnlyDictio
255255
{
256256
return new MarkdownEntityHint
257257
{
258-
Label = ReadString(map, LabelKey) ?? ReadString(map, NameKey) ?? string.Empty,
258+
Label = ReadString(map, LabelKey) ?? ReadString(map, NameKey) ?? Empty,
259259
SameAs = ReadString(map, SameAsKey) ?? ReadString(map, SameAsSnakeKey),
260260
Type = ReadString(map, TypeKey),
261261
};
262262
}
263263

264264
if (item is IDictionary<object, object?> dynamicMap)
265265
{
266-
var dictionary = dynamicMap.ToDictionary(entry => entry.Key.ToString() ?? string.Empty, entry => entry.Value, StringComparer.OrdinalIgnoreCase);
266+
var dictionary = dynamicMap.ToDictionary(entry => entry.Key.ToString() ?? Empty, entry => entry.Value, StringComparer.OrdinalIgnoreCase);
267267
return ReadEntityHint(dictionary);
268268
}
269269

270-
return new MarkdownEntityHint { Label = NormalizeLabel(item.ToString() ?? string.Empty) };
270+
return new MarkdownEntityHint { Label = NormalizeLabel(item.ToString() ?? Empty) };
271271
}
272272

273273
private static string NormalizeLabel(string value)

src/MarkdownLd.Kb/Extraction/MarkdownKnowledgeCanonicalizer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ private static string ResolveEntityId(string value, IReadOnlyDictionary<string,
163163
return entityId;
164164
}
165165

166-
if (Uri.TryCreate(value, UriKind.Absolute, out var uri) || value.StartsWith(UrnSchemePrefix, StringComparison.OrdinalIgnoreCase))
166+
if (Uri.TryCreate(value, UriKind.Absolute, out _) || value.StartsWith(UrnSchemePrefix, StringComparison.OrdinalIgnoreCase))
167167
{
168168
return value;
169169
}

0 commit comments

Comments
 (0)