DuendeSoftware
diff --git a/‎server/Docs.slnx‎
Lines changed: 2 additions & 0 deletions b/‎server/Docs.slnx‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎server/src/Docs.Indexer/Docs.Indexer.csproj‎
Lines changed: 21 additions & 0 deletions b/‎server/src/Docs.Indexer/Docs.Indexer.csproj‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎server/src/Docs.Indexer/Indexers/BlogIndexer.cs‎
Lines changed: 102 additions & 0 deletions b/‎server/src/Docs.Indexer/Indexers/BlogIndexer.cs‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎server/src/Docs.Indexer/Indexers/DocsIndexer.cs‎
Lines changed: 163 additions & 0 deletions b/‎server/src/Docs.Indexer/Indexers/DocsIndexer.cs‎
Lines changed: 163 additions & 0 deletions
@@ -3,6 +3,8 @@
     <Project Path="src/Docs.ServiceDefaults/Docs.ServiceDefaults.csproj"/>
     <Project Path="src/Docs.Web/Docs.Web.csproj"/>
     <Project Path="src/Docs.AppHost/Docs.AppHost.csproj"/>
+    <Project Path="src/Docs.Mcp/Docs.Mcp.csproj"/>
+    <Project Path="src/Docs.Indexer/Docs.Indexer.csproj"/>
   </Folder>
   <Folder Name="/tests/">
     <Project Path="tests/Docs.Web.Tests/Docs.Web.Tests.csproj"/>
 
@@ -0,0 +1,21 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net10.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="HtmlAgilityPack" Version="1.12.1" />
+    <PackageReference Include="Markdig" Version="0.41.0" />
+    <PackageReference Include="ReverseMarkdown" Version="4.6.0" />
+    <PackageReference Include="SimpleFeedReader" Version="2.0.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Docs.Mcp\Docs.Mcp.csproj" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,102 @@
+// Copyright (c) Duende Software. All rights reserved.
+// See LICENSE in the project root for license information.
+
+using Docs.Mcp.Database;
+using HtmlAgilityPack;
+using Microsoft.EntityFrameworkCore;
+using ReverseMarkdown;
+using SimpleFeedReader;
+
+namespace Docs.Indexer.Indexers;
+
+/// <summary>
+/// Indexes blog articles from the Duende Software RSS feed.
+/// </summary>
+public sealed class BlogIndexer(McpDb db, HttpClient httpClient)
+{
+    private const string RssFeedUrl = "https://duendesoftware.com/rss.xml";
+    private static readonly DateTime ReferenceDate = new(2024, 10, 01);
+
+    /// <summary>
+    /// Fetch and index blog articles from the RSS feed.
+    /// </summary>
+    public async Task IndexAsync()
+    {
+        Console.WriteLine($"Fetching RSS feed: {RssFeedUrl}");
+
+        var reader = new FeedReader();
+        var items = await reader.RetrieveFeedAsync(RssFeedUrl);
+
+        // Filter to blog posts since the reference date
+        var blogItems = items
+            .Where(it => it.PublishDate >= ReferenceDate && it.Categories?.Contains("blog") == true)
+            .ToList();
+
+        Console.WriteLine($"Found {blogItems.Count} blog posts since {ReferenceDate:yyyy-MM-dd}");
+
+        var indexedCount = 0;
+        foreach (var item in blogItems)
+        {
+            if (item.Uri == null)
+            {
+                continue;
+            }
+
+            try
+            {
+                await IndexBlogPostAsync(item.Title ?? "Untitled", item.GetSummary(), item.Uri);
+                indexedCount++;
+                Console.WriteLine($"  Indexed: {item.Title}");
+            }
+            catch (Exception ex)
+            {
+                Console.WriteLine($"  Error indexing {item.Title}: {ex.Message}");
+                throw;
+            }
+        }
+
+        await db.SaveChangesAsync();
+        Console.WriteLine($"Indexed {indexedCount} blog articles");
+    }
+
+    private async Task IndexBlogPostAsync(string title, string? description, Uri url)
+    {
+        // Fetch the HTML content
+        var htmlContent = await httpClient.GetStringAsync(url);
+
+        // Parse HTML and find content section
+        var htmlDocument = new HtmlDocument();
+        htmlDocument.LoadHtml(htmlContent);
+
+        // Try to find the main content section
+        var content = htmlDocument.DocumentNode.SelectSingleNode("//section[@class='page-content alt markdown']")
+            ?? htmlDocument.DocumentNode.SelectSingleNode("//article")
+            ?? htmlDocument.DocumentNode.SelectSingleNode("//main");
+
+        if (content == null)
+        {
+            Console.WriteLine($"    Warning: Could not find content section for {title}");
+            return;
+        }
+
+        // Convert HTML to Markdown
+        var converter = new Converter(new Config
+        {
+            GithubFlavored = true,
+            RemoveComments = true
+        });
+
+        var markdownContent = converter.Convert(content.InnerHtml);
+
+        // Combine description with content if available
+        var fullContent = !string.IsNullOrEmpty(description)
+            ? $"Summary: {description}\n\n---\n\n{markdownContent}"
+            : markdownContent;
+
+        await db.Database.ExecuteSqlRawAsync(
+            "INSERT INTO FTSBlogArticle (Id, Title, Content) VALUES ({0}, {1}, {2})",
+            Guid.NewGuid().ToString(),
+            title,
+            fullContent);
+    }
+}
@@ -0,0 +1,163 @@
+// Copyright (c) Duende Software. All rights reserved.
+// See LICENSE in the project root for license information.
+
+using System.Text;
+using System.Text.RegularExpressions;
+using Docs.Mcp.Database;
+using Markdig.Syntax;
+using Markdig.Syntax.Inlines;
+using Microsoft.EntityFrameworkCore;
+
+namespace Docs.Indexer.Indexers;
+
+/// <summary>
+/// Indexes documentation articles from local llms.txt files.
+/// </summary>
+public sealed partial class DocsIndexer(McpDb db)
+{
+    /// <summary>
+    /// Index documentation from the wwwroot directory.
+    /// Reads llms.txt and parses linked _llms-txt/*.txt files.
+    /// </summary>
+    public async Task IndexAsync(string wwwrootPath)
+    {
+        var llmsTxtPath = Path.Combine(wwwrootPath, "llms.txt");
+        if (!File.Exists(llmsTxtPath))
+        {
+            throw new InvalidOperationException($"llms.txt not found at: {llmsTxtPath}");
+        }
+
+        Console.WriteLine($"Reading: {llmsTxtPath}");
+        var llmsTxt = await File.ReadAllTextAsync(llmsTxtPath);
+        var llmsMd = Markdig.Markdown.Parse(llmsTxt);
+
+        var llmsTxtDir = Path.Combine(wwwrootPath, "_llms-txt");
+        if (!Directory.Exists(llmsTxtDir))
+        {
+            throw new InvalidOperationException($"_llms-txt directory not found at: {llmsTxtDir}");
+        }
+
+        var totalArticles = 0;
+
+        // Find links to _llms-txt files
+        foreach (var link in llmsMd.Descendants<LinkInline>())
+        {
+            if (link.Url?.Contains("_llms-txt/", StringComparison.OrdinalIgnoreCase) != true)
+            {
+                continue;
+            }
+
+            // Extract filename from URL
+            var fileName = ExtractFileName(link.Url);
+            if (string.IsNullOrEmpty(fileName))
+            {
+                continue;
+            }
+
+            var filePath = Path.Combine(llmsTxtDir, fileName);
+            if (!File.Exists(filePath))
+            {
+                Console.WriteLine($"  Warning: File not found: {filePath}");
+                continue;
+            }
+
+            var articlesCount = await IndexDocumentFileAsync(filePath);
+            totalArticles += articlesCount;
+        }
+
+        await db.SaveChangesAsync();
+        Console.WriteLine($"Indexed {totalArticles} documentation articles");
+    }
+
+    private async Task<int> IndexDocumentFileAsync(string filePath)
+    {
+        Console.WriteLine($"  Processing: {Path.GetFileName(filePath)}");
+        var content = await File.ReadAllTextAsync(filePath);
+
+        // Split on ----- delimiter
+        var sections = content.Split(["-----"], StringSplitOptions.RemoveEmptyEntries);
+        var articlesCount = 0;
+        string? productName = null;
+
+        foreach (var section in sections)
+        {
+            var trimmedSection = section.Trim();
+            if (string.IsNullOrEmpty(trimmedSection))
+            {
+                continue;
+            }
+
+            // Skip SYSTEM tags at the start
+            if (trimmedSection.StartsWith("<SYSTEM>", StringComparison.OrdinalIgnoreCase))
+            {
+                var endTag = trimmedSection.IndexOf("</SYSTEM>", StringComparison.OrdinalIgnoreCase);
+                if (endTag > 0)
+                {
+                    trimmedSection = trimmedSection[(endTag + 9)..].Trim();
+                }
+            }
+
+            if (string.IsNullOrEmpty(trimmedSection))
+            {
+                continue;
+            }
+
+            // Extract title from first H1
+            var title = ExtractH1Title(trimmedSection);
+            if (string.IsNullOrEmpty(title))
+            {
+                continue;
+            }
+
+            // First H1 in the file becomes the product name
+            productName ??= title;
+
+            // Extract content (everything after the H1 line)
+            var contentStart = trimmedSection.IndexOf('\n');
+            var articleContent = contentStart > 0 ? trimmedSection[(contentStart + 1)..].Trim() : trimmedSection;
+
+            await db.Database.ExecuteSqlRawAsync(
+                "INSERT INTO FTSDocsArticle (Id, Product, Title, Content) VALUES ({0}, {1}, {2}, {3})",
+                Guid.NewGuid().ToString(),
+                productName,
+                title,
+                articleContent);
+
+            articlesCount++;
+        }
+
+        return articlesCount;
+    }
+
+    private static string? ExtractFileName(string url)
+    {
+        // URL format: https://docs.duendesoftware.com/_llms-txt/access-token-management.txt
+        var match = LlmsTxtFileNameRegex().Match(url);
+        return match.Success ? match.Groups[1].Value : null;
+    }
+
+    private static string? ExtractH1Title(string markdown)
+    {
+        // Find first line starting with # (but not ##)
+        using var reader = new StringReader(markdown);
+        while (reader.ReadLine() is { } line)
+        {
+            var trimmed = line.TrimStart();
+            if (trimmed.StartsWith("# ", StringComparison.Ordinal))
+            {
+                return trimmed[2..].Trim();
+            }
+
+            // Skip empty lines, but stop at non-heading content
+            if (!string.IsNullOrWhiteSpace(trimmed) && !trimmed.StartsWith('#'))
+            {
+                break;
+            }
+        }
+
+        return null;
+    }
+
+    [GeneratedRegex(@"_llms-txt/([^/]+\.txt)$", RegexOptions.IgnoreCase)]
+    private static partial Regex LlmsTxtFileNameRegex();
+}