Skip to content

Commit 3c5aeb2

Browse files
committed
Add MCP server support with documentation search infrastructure
Port MCP server from labs into the production docs site, adding: - Docs.Mcp: shared library with SQLite FTS5 DbContext, entity models, and tool stubs - Docs.Indexer: CLI tool to build search index from docs, blog RSS, and GitHub samples - Docs.Web: conditional MCP endpoint at /mcp (enabled when mcp.db exists)
1 parent b4e026a commit 3c5aeb2

19 files changed

Lines changed: 913 additions & 3 deletions

server/Docs.slnx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
<Project Path="src/Docs.ServiceDefaults/Docs.ServiceDefaults.csproj"/>
44
<Project Path="src/Docs.Web/Docs.Web.csproj"/>
55
<Project Path="src/Docs.AppHost/Docs.AppHost.csproj"/>
6+
<Project Path="src/Docs.Mcp/Docs.Mcp.csproj"/>
7+
<Project Path="src/Docs.Indexer/Docs.Indexer.csproj"/>
68
</Folder>
79
<Folder Name="/tests/">
810
<Project Path="tests/Docs.Web.Tests/Docs.Web.Tests.csproj"/>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net10.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="HtmlAgilityPack" Version="1.12.1" />
12+
<PackageReference Include="Markdig" Version="0.41.0" />
13+
<PackageReference Include="ReverseMarkdown" Version="4.6.0" />
14+
<PackageReference Include="SimpleFeedReader" Version="2.0.0" />
15+
</ItemGroup>
16+
17+
<ItemGroup>
18+
<ProjectReference Include="..\Docs.Mcp\Docs.Mcp.csproj" />
19+
</ItemGroup>
20+
21+
</Project>
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// Copyright (c) Duende Software. All rights reserved.
2+
// See LICENSE in the project root for license information.
3+
4+
using Docs.Mcp.Database;
5+
using HtmlAgilityPack;
6+
using Microsoft.EntityFrameworkCore;
7+
using ReverseMarkdown;
8+
using SimpleFeedReader;
9+
10+
namespace Docs.Indexer.Indexers;
11+
12+
/// <summary>
13+
/// Indexes blog articles from the Duende Software RSS feed.
14+
/// </summary>
15+
public sealed class BlogIndexer(McpDb db, HttpClient httpClient)
16+
{
17+
private const string RssFeedUrl = "https://duendesoftware.com/rss.xml";
18+
private static readonly DateTime ReferenceDate = new(2024, 10, 01);
19+
20+
/// <summary>
21+
/// Fetch and index blog articles from the RSS feed.
22+
/// </summary>
23+
public async Task IndexAsync()
24+
{
25+
Console.WriteLine($"Fetching RSS feed: {RssFeedUrl}");
26+
27+
var reader = new FeedReader();
28+
var items = await reader.RetrieveFeedAsync(RssFeedUrl);
29+
30+
// Filter to blog posts since the reference date
31+
var blogItems = items
32+
.Where(it => it.PublishDate >= ReferenceDate && it.Categories?.Contains("blog") == true)
33+
.ToList();
34+
35+
Console.WriteLine($"Found {blogItems.Count} blog posts since {ReferenceDate:yyyy-MM-dd}");
36+
37+
var indexedCount = 0;
38+
foreach (var item in blogItems)
39+
{
40+
if (item.Uri == null)
41+
{
42+
continue;
43+
}
44+
45+
try
46+
{
47+
await IndexBlogPostAsync(item.Title ?? "Untitled", item.GetSummary(), item.Uri);
48+
indexedCount++;
49+
Console.WriteLine($" Indexed: {item.Title}");
50+
}
51+
catch (Exception ex)
52+
{
53+
Console.WriteLine($" Error indexing {item.Title}: {ex.Message}");
54+
throw;
55+
}
56+
}
57+
58+
await db.SaveChangesAsync();
59+
Console.WriteLine($"Indexed {indexedCount} blog articles");
60+
}
61+
62+
private async Task IndexBlogPostAsync(string title, string? description, Uri url)
63+
{
64+
// Fetch the HTML content
65+
var htmlContent = await httpClient.GetStringAsync(url);
66+
67+
// Parse HTML and find content section
68+
var htmlDocument = new HtmlDocument();
69+
htmlDocument.LoadHtml(htmlContent);
70+
71+
// Try to find the main content section
72+
var content = htmlDocument.DocumentNode.SelectSingleNode("//section[@class='page-content alt markdown']")
73+
?? htmlDocument.DocumentNode.SelectSingleNode("//article")
74+
?? htmlDocument.DocumentNode.SelectSingleNode("//main");
75+
76+
if (content == null)
77+
{
78+
Console.WriteLine($" Warning: Could not find content section for {title}");
79+
return;
80+
}
81+
82+
// Convert HTML to Markdown
83+
var converter = new Converter(new Config
84+
{
85+
GithubFlavored = true,
86+
RemoveComments = true
87+
});
88+
89+
var markdownContent = converter.Convert(content.InnerHtml);
90+
91+
// Combine description with content if available
92+
var fullContent = !string.IsNullOrEmpty(description)
93+
? $"Summary: {description}\n\n---\n\n{markdownContent}"
94+
: markdownContent;
95+
96+
await db.Database.ExecuteSqlRawAsync(
97+
"INSERT INTO FTSBlogArticle (Id, Title, Content) VALUES ({0}, {1}, {2})",
98+
Guid.NewGuid().ToString(),
99+
title,
100+
fullContent);
101+
}
102+
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// Copyright (c) Duende Software. All rights reserved.
2+
// See LICENSE in the project root for license information.
3+
4+
using System.Text;
5+
using System.Text.RegularExpressions;
6+
using Docs.Mcp.Database;
7+
using Markdig.Syntax;
8+
using Markdig.Syntax.Inlines;
9+
using Microsoft.EntityFrameworkCore;
10+
11+
namespace Docs.Indexer.Indexers;
12+
13+
/// <summary>
14+
/// Indexes documentation articles from local llms.txt files.
15+
/// </summary>
16+
public sealed partial class DocsIndexer(McpDb db)
17+
{
18+
/// <summary>
19+
/// Index documentation from the wwwroot directory.
20+
/// Reads llms.txt and parses linked _llms-txt/*.txt files.
21+
/// </summary>
22+
public async Task IndexAsync(string wwwrootPath)
23+
{
24+
var llmsTxtPath = Path.Combine(wwwrootPath, "llms.txt");
25+
if (!File.Exists(llmsTxtPath))
26+
{
27+
throw new InvalidOperationException($"llms.txt not found at: {llmsTxtPath}");
28+
}
29+
30+
Console.WriteLine($"Reading: {llmsTxtPath}");
31+
var llmsTxt = await File.ReadAllTextAsync(llmsTxtPath);
32+
var llmsMd = Markdig.Markdown.Parse(llmsTxt);
33+
34+
var llmsTxtDir = Path.Combine(wwwrootPath, "_llms-txt");
35+
if (!Directory.Exists(llmsTxtDir))
36+
{
37+
throw new InvalidOperationException($"_llms-txt directory not found at: {llmsTxtDir}");
38+
}
39+
40+
var totalArticles = 0;
41+
42+
// Find links to _llms-txt files
43+
foreach (var link in llmsMd.Descendants<LinkInline>())
44+
{
45+
if (link.Url?.Contains("_llms-txt/", StringComparison.OrdinalIgnoreCase) != true)
46+
{
47+
continue;
48+
}
49+
50+
// Extract filename from URL
51+
var fileName = ExtractFileName(link.Url);
52+
if (string.IsNullOrEmpty(fileName))
53+
{
54+
continue;
55+
}
56+
57+
var filePath = Path.Combine(llmsTxtDir, fileName);
58+
if (!File.Exists(filePath))
59+
{
60+
Console.WriteLine($" Warning: File not found: {filePath}");
61+
continue;
62+
}
63+
64+
var articlesCount = await IndexDocumentFileAsync(filePath);
65+
totalArticles += articlesCount;
66+
}
67+
68+
await db.SaveChangesAsync();
69+
Console.WriteLine($"Indexed {totalArticles} documentation articles");
70+
}
71+
72+
private async Task<int> IndexDocumentFileAsync(string filePath)
73+
{
74+
Console.WriteLine($" Processing: {Path.GetFileName(filePath)}");
75+
var content = await File.ReadAllTextAsync(filePath);
76+
77+
// Split on ----- delimiter
78+
var sections = content.Split(["-----"], StringSplitOptions.RemoveEmptyEntries);
79+
var articlesCount = 0;
80+
string? productName = null;
81+
82+
foreach (var section in sections)
83+
{
84+
var trimmedSection = section.Trim();
85+
if (string.IsNullOrEmpty(trimmedSection))
86+
{
87+
continue;
88+
}
89+
90+
// Skip SYSTEM tags at the start
91+
if (trimmedSection.StartsWith("<SYSTEM>", StringComparison.OrdinalIgnoreCase))
92+
{
93+
var endTag = trimmedSection.IndexOf("</SYSTEM>", StringComparison.OrdinalIgnoreCase);
94+
if (endTag > 0)
95+
{
96+
trimmedSection = trimmedSection[(endTag + 9)..].Trim();
97+
}
98+
}
99+
100+
if (string.IsNullOrEmpty(trimmedSection))
101+
{
102+
continue;
103+
}
104+
105+
// Extract title from first H1
106+
var title = ExtractH1Title(trimmedSection);
107+
if (string.IsNullOrEmpty(title))
108+
{
109+
continue;
110+
}
111+
112+
// First H1 in the file becomes the product name
113+
productName ??= title;
114+
115+
// Extract content (everything after the H1 line)
116+
var contentStart = trimmedSection.IndexOf('\n');
117+
var articleContent = contentStart > 0 ? trimmedSection[(contentStart + 1)..].Trim() : trimmedSection;
118+
119+
await db.Database.ExecuteSqlRawAsync(
120+
"INSERT INTO FTSDocsArticle (Id, Product, Title, Content) VALUES ({0}, {1}, {2}, {3})",
121+
Guid.NewGuid().ToString(),
122+
productName,
123+
title,
124+
articleContent);
125+
126+
articlesCount++;
127+
}
128+
129+
return articlesCount;
130+
}
131+
132+
private static string? ExtractFileName(string url)
133+
{
134+
// URL format: https://docs.duendesoftware.com/_llms-txt/access-token-management.txt
135+
var match = LlmsTxtFileNameRegex().Match(url);
136+
return match.Success ? match.Groups[1].Value : null;
137+
}
138+
139+
private static string? ExtractH1Title(string markdown)
140+
{
141+
// Find first line starting with # (but not ##)
142+
using var reader = new StringReader(markdown);
143+
while (reader.ReadLine() is { } line)
144+
{
145+
var trimmed = line.TrimStart();
146+
if (trimmed.StartsWith("# ", StringComparison.Ordinal))
147+
{
148+
return trimmed[2..].Trim();
149+
}
150+
151+
// Skip empty lines, but stop at non-heading content
152+
if (!string.IsNullOrWhiteSpace(trimmed) && !trimmed.StartsWith('#'))
153+
{
154+
break;
155+
}
156+
}
157+
158+
return null;
159+
}
160+
161+
[GeneratedRegex(@"_llms-txt/([^/]+\.txt)$", RegexOptions.IgnoreCase)]
162+
private static partial Regex LlmsTxtFileNameRegex();
163+
}

0 commit comments

Comments
 (0)