Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/convert-and-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@ jobs:
New-Item -Path $publish -ItemType Directory -Force | Out-Null
Get-ChildItem -LiteralPath $converted -Directory | ForEach-Object {
$name = $_.Name
$md = Join-Path $_.FullName 'index.md'
$md = Join-Path $_.FullName "$name.md"
if (-not (Test-Path -LiteralPath $md)) { $md = Join-Path $_.FullName 'index.md' }
if (-not (Test-Path -LiteralPath $md)) { return }
$dest = Join-Path $publish $name
New-Item -Path $dest -ItemType Directory -Force | Out-Null
Copy-Item -LiteralPath $md -Destination $dest -Force
Copy-Item -LiteralPath $md -Destination (Join-Path $dest 'index.md') -Force
$media = Join-Path $_.FullName 'media'
if (Test-Path -LiteralPath $media -PathType Container) {
Copy-Item -LiteralPath $media -Destination $dest -Recurse -Force
Expand Down
3 changes: 2 additions & 1 deletion AwakeCoding.OpenSpecs/AwakeCoding.OpenSpecs.psd1
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
'Invoke-OpenSpecConversionPipeline',
'Get-OpenSpecConversionReport',
'Test-OpenSpecMarkdownFidelity',
'Update-OpenSpecIndex'
'Update-OpenSpecIndex',
'Compare-OpenSpecToLiveHtml'
)
CmdletsToExport = @()
VariablesToExport = @()
Expand Down
144 changes: 144 additions & 0 deletions AwakeCoding.OpenSpecs/Public/Compare-OpenSpecToLiveHtml.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
function Compare-OpenSpecToLiveHtml {
<#
.SYNOPSIS
Compares converted markdown structure to the live HTML spec page on learn.microsoft.com.
.DESCRIPTION
For each converted spec, fetches the live spec page HTML and extracts its structure
(headings, section IDs). Compares with the converted markdown structure and reports
missing sections, heading/ID mismatches, and suggested manual review items.
#>
[CmdletBinding()]
param(
[string]$OutputPath = (Join-Path -Path (Get-Location) -ChildPath 'converted-specs'),

[string[]]$ProtocolId
)

if (-not (Test-Path -LiteralPath $OutputPath)) {
throw "Output path '$OutputPath' was not found."
}

$catalog = @{}
try {
foreach ($e in (Get-OpenSpecCatalog)) {
$catalog[$e.ProtocolId] = $e
$norm = $e.ProtocolId -replace '-', '_'
if (-not $catalog[$norm]) { $catalog[$norm] = $e }
}
}
catch {
Write-Warning "Could not fetch catalog: $($_.Exception.Message)"
}

$reports = Get-OpenSpecConversionReport -OutputPath $OutputPath -ProtocolId $ProtocolId
$results = New-Object System.Collections.Generic.List[object]

foreach ($report in $reports) {
try {
$protocolId = $report.ProtocolId
if ([string]::IsNullOrWhiteSpace($protocolId)) { continue }
$mdPath = $report.MarkdownPath
if ([string]::IsNullOrWhiteSpace($mdPath)) {
$mdPath = Join-Path (Join-Path $OutputPath $protocolId) "$protocolId.md"
}
$markdown = ''
if ($mdPath -and (Test-Path -LiteralPath $mdPath -PathType Leaf -ErrorAction SilentlyContinue)) {
$markdown = Get-Content -LiteralPath $mdPath -Raw -ErrorAction SilentlyContinue
}

$mdHeadings = New-Object System.Collections.Generic.List[object]
$mdAnchors = New-Object System.Collections.Generic.HashSet[string]([System.StringComparer]::OrdinalIgnoreCase)
$mdHeadingRegex = [regex]::new('(?m)^(#{1,6})\s+(.+)$')
foreach ($m in $mdHeadingRegex.Matches($markdown)) {
$level = $m.Groups[1].Value.Length
$text = $m.Groups[2].Value.Trim()
[void]$mdHeadings.Add([pscustomobject]@{ Level = $level; Text = $text })
}
foreach ($m in [regex]::Matches($markdown, '<a\s+id="([^"]+)"\s*>\s*</a>')) {
[void]$mdAnchors.Add($m.Groups[1].Value)
}

$liveHeadings = New-Object System.Collections.Generic.List[object]
$liveAnchors = New-Object System.Collections.Generic.HashSet[string]([System.StringComparer]::OrdinalIgnoreCase)
$liveUrl = $null
$fetchError = $null

$entry = $catalog[$protocolId]
if ($entry) {
$liveUrl = $entry.SpecPageUrl
try {
$response = Invoke-OpenSpecRequest -Uri $liveUrl
$html = $response.Content
$hRegex = [regex]::new('(?is)<h([1-6])(?:\s[^>]*)?\s+id="([^"]+)"[^>]*>([^<]*)</h\1>')
foreach ($m in $hRegex.Matches($html)) {
$level = [int]$m.Groups[1].Value
$id = $m.Groups[2].Value
$text = (ConvertFrom-OpenSpecHtml -Html $m.Groups[3].Value).Trim()
[void]$liveHeadings.Add([pscustomobject]@{ Level = $level; Id = $id; Text = $text })
[void]$liveAnchors.Add($id)
}
$altHRegex = [regex]::new('(?is)<h([1-6])[^>]*>([^<]*)</h\1>')
foreach ($m in $altHRegex.Matches($html)) {
$level = [int]$m.Groups[1].Value
$text = (ConvertFrom-OpenSpecHtml -Html $m.Groups[2].Value).Trim()
$lastText = if ($liveHeadings.Count -gt 0) { $liveHeadings[$liveHeadings.Count - 1].Text } else { $null }
if ($text.Length -gt 0 -and $text -ne $lastText) {
[void]$liveHeadings.Add([pscustomobject]@{ Level = $level; Id = $null; Text = $text })
}
}
}
catch {
$fetchError = $_.Exception.Message
}
}
else {
$fetchError = "Protocol not found in catalog"
}

$missingInMd = New-Object System.Collections.Generic.List[string]
$missingInLive = New-Object System.Collections.Generic.List[string]
$mdAnchorList = @($mdAnchors)
foreach ($aid in $mdAnchorList) {
if ($aid -notmatch '^_Toc\d+$' -and $liveAnchors.Count -gt 0 -and -not $liveAnchors.Contains($aid)) {
[void]$missingInLive.Add($aid)
}
}
foreach ($aid in @($liveAnchors)) {
if (-not $mdAnchors.Contains($aid)) {
[void]$missingInMd.Add($aid)
}
}

$suggestReview = $false
if ($fetchError) { $suggestReview = $true }
if ($missingInMd.Count -gt 5 -or $missingInLive.Count -gt 5) { $suggestReview = $true }
if ($liveHeadings.Count -eq 0 -and $mdHeadings.Count -gt 0) { $suggestReview = $true }

[void]$results.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.LiveHtmlCompareResult'
ProtocolId = $protocolId
MarkdownPath = $mdPath
LiveUrl = $liveUrl
FetchError = $fetchError
MarkdownHeadingCount = $mdHeadings.Count
MarkdownAnchorCount = $mdAnchors.Count
LiveHeadingCount = $liveHeadings.Count
LiveAnchorCount = $liveAnchors.Count
MissingInMarkdown = @($missingInMd)
MissingInLive = @($missingInLive)
SuggestManualReview = $suggestReview
IssueCount = $report.IssueCount
})
} catch {
Write-Warning "Compare failed for $($report.ProtocolId): $_"
[void]$results.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.LiveHtmlCompareResult'
ProtocolId = $report.ProtocolId
FetchError = $_.Exception.Message
SuggestManualReview = $true
})
}
}

$results
}
9 changes: 9 additions & 0 deletions AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ function Get-OpenSpecCatalog {
'(?is)<a\b[^>]*href\s*=\s*["''](?<href>\.\./(?<slug>(?:ms|mc)-[a-z0-9-]+)/(?<guid>[0-9a-f-]{36}))(?:["''][^>]*)?>(?<text>.*?)</a>'
)
$idRegex = [regex]::new('\[(?<id>(?:MS|MC)-[A-Z0-9-]+)\]', 'IgnoreCase')
$cellRegex = [regex]::new('(?is)<td[^>]*>(?<content>.*?)</td>')

$seen = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
$entries = New-Object System.Collections.Generic.List[object]
Expand Down Expand Up @@ -41,10 +42,17 @@ function Get-OpenSpecCatalog {
$title = $protocolId
}

$description = ''
$cells = [regex]::Matches($rowHtml, $cellRegex)
if ($cells.Count -ge 2) {
$description = (ConvertFrom-OpenSpecHtml -Html $cells[1].Groups['content'].Value).Trim()
}

$entries.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
ProtocolId = $protocolId
Title = $title
Description = $description
SpecPageUrl = $specPageUrl
Slug = $slug
SourcePage = $Uri
Expand All @@ -63,6 +71,7 @@ function Get-OpenSpecCatalog {
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
ProtocolId = $protocolId
Title = $protocolId
Description = ''
SpecPageUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($protocolId.ToLowerInvariant())"
Slug = $protocolId.ToLowerInvariant()
SourcePage = $Uri
Expand Down
90 changes: 71 additions & 19 deletions AwakeCoding.OpenSpecs/Public/Update-OpenSpecIndex.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,35 @@ function Update-OpenSpecIndex {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$Path
[string]$Path,

[switch]$UseCatalogTitles = $true,

[switch]$IncludeDescription = $false
)

if (-not (Test-Path -LiteralPath $Path)) {
throw "Output directory not found: $Path"
}

$catalogMap = @{}
if ($UseCatalogTitles) {
try {
$catalog = @(Get-OpenSpecCatalog)
foreach ($entry in $catalog) {
$catalogMap[$entry.ProtocolId] = $entry
$normalized = $entry.ProtocolId -replace '-', '_'
if (-not $catalogMap[$normalized]) { $catalogMap[$normalized] = $entry }
}
}
catch {
Write-Warning "Could not fetch catalog for titles: $($_.Exception.Message). Using markdown fallback."
}
}

$specDirs = Get-ChildItem -LiteralPath $Path -Directory | Sort-Object Name
$entries = New-Object System.Collections.Generic.List[pscustomobject]
$boilerplatePatterns = @('Intellectual Property Rights', 'Open Specifications Documentation')

foreach ($dir in $specDirs) {
$specName = $dir.Name
Expand All @@ -25,28 +45,50 @@ function Update-OpenSpecIndex {
}

$mdFileName = [System.IO.Path]::GetFileName($mdFile)

# Extract the title from line 3 of the markdown.
# Expected format:
# Line 1: **[MS-RDPECLIP]:**
# Line 2: (blank)
# Line 3: **Remote Desktop Protocol: Clipboard Virtual Channel Extension**
$lines = Get-Content -LiteralPath $mdFile -TotalCount 5 -ErrorAction SilentlyContinue
$title = ''
if ($lines -and $lines.Count -ge 3) {
$rawTitle = $lines[2]
$title = $rawTitle -replace '^\*\*(.+)\*\*$', '$1'
$title = $title.Trim()
$description = ''

$catalogEntry = $catalogMap[$specName]
if ($catalogEntry) {
$title = $catalogEntry.Title
if ($IncludeDescription -and $catalogEntry.Description) {
$description = $catalogEntry.Description
}
}

if ([string]::IsNullOrWhiteSpace($title)) {
$lines = Get-Content -LiteralPath $mdFile -TotalCount 30 -ErrorAction SilentlyContinue
$protocolLabelRegex = [regex]::new('^\*\*\[?(?:MS|MC)-[A-Z0-9-]+\]?\s*:\s*\*\*$', 'IgnoreCase')
$boldLineRegex = [regex]::new('^\*\*(.+)\*\*$')
$foundLabel = $false
foreach ($line in $lines) {
if ($protocolLabelRegex.IsMatch($line.Trim())) {
$foundLabel = $true
continue
}
if ($foundLabel -and $boldLineRegex.IsMatch($line.Trim())) {
$candidate = ($line -replace '^\*\*(.+)\*\*$', '$1').Trim()
$isBoilerplate = $false
foreach ($pat in $boilerplatePatterns) {
if ($candidate -like "*$pat*") { $isBoilerplate = $true; break }
}
if (-not $isBoilerplate -and $candidate.Length -gt 2) {
$title = $candidate
break
}
}
}
}

if ([string]::IsNullOrWhiteSpace($title)) {
$title = $specName
}

[void]$entries.Add([pscustomobject]@{
Name = $specName
Title = $title
Link = "$specName/$mdFileName"
Name = $specName
Title = $title
Description = $description
Link = "$specName/$mdFileName"
})
}

Expand All @@ -55,11 +97,21 @@ function Update-OpenSpecIndex {
[void]$sb.AppendLine()
[void]$sb.AppendLine("$($entries.Count) protocol specifications converted to Markdown.")
[void]$sb.AppendLine()
[void]$sb.AppendLine('| Protocol | Title |')
[void]$sb.AppendLine('|---|---|')

foreach ($entry in $entries) {
[void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) |")
if ($IncludeDescription) {
[void]$sb.AppendLine('| Protocol | Title | Description |')
[void]$sb.AppendLine('|---|---|---|')
foreach ($entry in $entries) {
$descEscaped = ($entry.Description -replace '\|', ', ' -replace '\r?\n', ' ').Trim()
[void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) | $descEscaped |")
}
}
else {
[void]$sb.AppendLine('| Protocol | Title |')
[void]$sb.AppendLine('|---|---|')
foreach ($entry in $entries) {
[void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) |")
}
}

$readmePath = Join-Path -Path $Path -ChildPath 'README.md'
Expand Down
Loading