Skip to content

Commit d0da620

Browse files
Remove learn.microsoft.com links from README section headers (#6)
* Add final publish PNG optimization with oxipng * Install oxipng from GitHub release binary * Fix oxipng PATH in workflow install step * Use skills/windows-protocols as default output, rename zip to windows-protocols.zip - Build-Publish.ps1: default PublishPath to skills/windows-protocols, ZipPath to windows-protocols.zip - Workflow: build into skills/windows-protocols, push from that folder, artifact names updated - .gitignore: ignore generated skills/windows-protocols/MS-*/ - README: add Converted Specifications Index section with 3-table overview Co-authored-by: Cursor <cursoragent@cursor.com> * Stop tracking generated README.md, improve index section links - Delete skills/windows-protocols/README.md (generated by build, not for commit) - Add skills/windows-protocols/README.md to .gitignore - Update-OpenSpecIndex: use markdown links for each section (Overview, Technical, Reference) to Learn pages Co-authored-by: Cursor <cursoragent@cursor.com> * Fix missing MS-MQOD/MS-THCH downloads: retry pass + lower CI throttle Co-authored-by: Cursor <cursoragent@cursor.com> * Add DOCX download fallback via RSS for stale links (MS-THCH, MS-MQOD) - Add Get-OpenSpecDocxFallbackUrls to fetch fallback URLs from protocol RSS feed - Save-OpenSpecDocument retries failed DOCX with Azure FD/blob URLs from RSS - Handles UTF-8 BOM in RSS response for correct XML parsing - Fixes protocols where Learn-page links have stale dates (404) - Include workflow, build, LEGAL, and test script updates Co-authored-by: Cursor <cursoragent@cursor.com> * create initial skill * Remove learn.microsoft.com links from README section headers Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent ea08ad4 commit d0da620

File tree

2,879 files changed

+1977279
-105
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,879 files changed

+1977279
-105
lines changed

.github/workflows/convert-and-publish.yml

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,55 @@ jobs:
1717
- name: Checkout repository
1818
uses: actions/checkout@v4
1919

20+
- name: Install oxipng
21+
shell: pwsh
22+
run: |
23+
$release = Invoke-RestMethod -Uri 'https://api.github.com/repos/oxipng/oxipng/releases/latest'
24+
$asset = $release.assets | Where-Object { $_.name -match 'x86_64-pc-windows-msvc\.zip$' } | Select-Object -First 1
25+
if (-not $asset) {
26+
throw 'Could not find Windows x86_64 zip asset in latest oxipng release.'
27+
}
28+
$zipPath = Join-Path $env:RUNNER_TEMP $asset.name
29+
Invoke-WebRequest -Uri $asset.browser_download_url -OutFile $zipPath
30+
$extractPath = Join-Path $env:RUNNER_TEMP 'oxipng'
31+
if (Test-Path -LiteralPath $extractPath) {
32+
Remove-Item -LiteralPath $extractPath -Recurse -Force
33+
}
34+
Expand-Archive -LiteralPath $zipPath -DestinationPath $extractPath -Force
35+
$binPath = Get-ChildItem -LiteralPath $extractPath -Recurse -File -Filter 'oxipng.exe' | Select-Object -First 1
36+
if (-not $binPath) {
37+
throw 'oxipng.exe was not found after extracting release archive.'
38+
}
39+
$binDir = Split-Path -Path $binPath.FullName -Parent
40+
$env:PATH = "$binDir;$env:PATH"
41+
Add-Content -Path $env:GITHUB_PATH -Value $binDir
42+
& $binPath.FullName --version
43+
2044
- name: Install OpenXML module
2145
shell: pwsh
2246
run: |
2347
Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
2448
Install-Module -Name OpenXML -Force -Scope CurrentUser
2549
26-
- name: Build publish tree and Windows_Protocols.zip
50+
- name: Build publish tree and windows-protocols.zip
2751
shell: pwsh
2852
working-directory: ${{ github.workspace }}
29-
run: .\scripts\Build-Publish.ps1
53+
run: .\scripts\Build-Publish.ps1 -ThrottleLimit 4 -AllowPartial
3054

3155
- name: Upload publish artifact
3256
uses: actions/upload-artifact@v4
3357
with:
3458
name: publish
35-
path: Windows_Protocols.zip
59+
path: windows-protocols.zip
60+
61+
- name: Stage downloadable bundle in publish tree
62+
shell: pwsh
63+
working-directory: ${{ github.workspace }}
64+
run: Copy-Item -LiteralPath .\windows-protocols.zip -Destination .\skills\windows-protocols\windows-protocols.zip -Force
3665

3766
- name: Push to orphaned publish branch
3867
shell: pwsh
39-
working-directory: publish
68+
working-directory: skills/windows-protocols
4069
env:
4170
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
4271
run: |

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ artifacts/
22
downloads*/
33
converted*/
44
reports*/
5-
publish/
5+
publish/
6+
windows-protocols.zip
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Returns fallback DOCX URLs for a protocol by fetching its RSS feed.
2+
# Used when the primary Learn-page link fails (e.g. stale dates on MS-THCH, MS-MQOD).
3+
# Azure Front Door works when blob storage returns 404; we try it first, then blob.
4+
function Get-OpenSpecDocxFallbackUrls {
5+
[CmdletBinding()]
6+
param(
7+
[Parameter(Mandatory)]
8+
[string]$ProtocolId
9+
)
10+
11+
$rssBase = 'https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net'
12+
$encodedBrackets = [System.Uri]::EscapeDataString("[$ProtocolId]")
13+
$rssUrl = "$rssBase/$ProtocolId/$encodedBrackets.rss"
14+
15+
try {
16+
$response = Invoke-WebRequest -Uri $rssUrl -MaximumRedirection 8 -TimeoutSec 30 -ErrorAction Stop
17+
}
18+
catch {
19+
Write-Verbose "RSS fetch failed for $ProtocolId : $($_.Exception.Message)"
20+
return @()
21+
}
22+
23+
$content = $response.Content
24+
if ($content -is [byte[]]) {
25+
$content = [System.Text.Encoding]::UTF8.GetString($content)
26+
}
27+
# Strip BOM if present (can break XML parsing)
28+
if ($content.Length -gt 0 -and [int][char]$content[0] -eq 0xFEFF) {
29+
$content = $content.Substring(1)
30+
}
31+
try {
32+
[xml]$xml = $content
33+
}
34+
catch {
35+
Write-Verbose "RSS parse failed for $ProtocolId : $($_.Exception.Message)"
36+
return @()
37+
}
38+
39+
$items = $xml.SelectNodes('//item')
40+
if (-not $items -or $items.Count -eq 0) {
41+
return @()
42+
}
43+
44+
$fallbacks = [System.Collections.Generic.List[string]]::new()
45+
foreach ($item in $items) {
46+
$desc = $item.description
47+
if (-not $desc) { continue }
48+
49+
# Match DOCX href (blob or azurefd), typically .../[MS-XYZ]-YYMMDD.docx
50+
$docxMatch = [regex]::Match($desc, 'href="([^"]+\.docx)(?:\?[^"]*)?"', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
51+
if (-not $docxMatch.Success) { continue }
52+
53+
$url = [System.Net.WebUtility]::HtmlDecode($docxMatch.Groups[1].Value)
54+
if (-not $url -or $url -notmatch '\.docx') { continue }
55+
56+
# Extract date suffix (YYMMDD) for Azure FD URL construction
57+
$dateMatch = [regex]::Match($url, '-(\d{6})\.docx')
58+
$dateSuffix = if ($dateMatch.Success) { $dateMatch.Groups[1].Value } else { $null }
59+
60+
# Azure Front Door works when blob returns 404; add first
61+
if ($dateSuffix) {
62+
$azureFdUrl = "$rssBase/$ProtocolId/$encodedBrackets-$dateSuffix.docx"
63+
if ($azureFdUrl -notin $fallbacks) {
64+
[void]$fallbacks.Add($azureFdUrl)
65+
}
66+
}
67+
68+
# Include original blob URL as fallback
69+
if ($url -match 'winprotocoldoc\.blob\.core\.windows\.net' -and $url -notin $fallbacks) {
70+
[void]$fallbacks.Add($url)
71+
}
72+
73+
# First item is latest version; we have enough fallbacks
74+
if ($fallbacks.Count -gt 0) { break }
75+
}
76+
77+
return @($fallbacks)
78+
}

AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1

Lines changed: 73 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
$script:OpenSpecReferenceDocsUri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a'
2+
$script:OpenSpecOverviewDocsUri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/4a1806f9-2979-491d-af3c-f82ed0a4c1ba'
23

34
# Reference specs (MS-DTYP, MS-ERREF, MS-LCID, MS-UCODEREF) from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a
45
$script:OpenSpecReferenceSpecs = @(
@@ -12,12 +13,10 @@ function Get-OpenSpecCatalog {
1213
[CmdletBinding()]
1314
param(
1415
[string]$Uri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/MS-WINPROTLP/e36c976a-6263-42a8-b119-7a3cc41ddd2a',
15-
[switch]$IncludeReferenceSpecs
16+
[switch]$IncludeReferenceSpecs,
17+
[switch]$IncludeOverviewDocs
1618
)
1719

18-
$response = Invoke-OpenSpecRequest -Uri $Uri
19-
$html = $response.Content
20-
2120
$rowRegex = [regex]::new('(?is)<tr[^>]*>(?<row>.*?)</tr>')
2221
$specLinkRegex = [regex]::new(
2322
'(?is)<a\b[^>]*href\s*=\s*["''](?<href>\.\./(?<slug>(?:ms|mc)-[a-z0-9-]+)/(?<guid>[0-9a-f-]{36}))(?:["''][^>]*)?>(?<text>.*?)</a>'
@@ -28,66 +27,88 @@ function Get-OpenSpecCatalog {
2827
$seen = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
2928
$entries = New-Object System.Collections.Generic.List[object]
3029

31-
foreach ($rowMatch in $rowRegex.Matches($html)) {
32-
$rowHtml = $rowMatch.Groups['row'].Value
33-
$linkMatch = $specLinkRegex.Match($rowHtml)
34-
if (-not $linkMatch.Success) {
35-
continue
36-
}
30+
$addFromPage = {
31+
param(
32+
[string]$SourceUri,
33+
[switch]$AllowFallback
34+
)
3735

38-
$labelText = ConvertFrom-OpenSpecHtml -Html $linkMatch.Groups['text'].Value
39-
$idMatch = $idRegex.Match($labelText)
40-
if (-not $idMatch.Success) {
41-
continue
42-
}
36+
$response = Invoke-OpenSpecRequest -Uri $SourceUri
37+
$html = $response.Content
38+
$addedCount = 0
4339

44-
$protocolId = $idMatch.Groups['id'].Value.ToUpperInvariant()
45-
if (-not $seen.Add($protocolId)) {
46-
continue
47-
}
40+
foreach ($rowMatch in $rowRegex.Matches($html)) {
41+
$rowHtml = $rowMatch.Groups['row'].Value
42+
$linkMatch = $specLinkRegex.Match($rowHtml)
43+
if (-not $linkMatch.Success) {
44+
continue
45+
}
4846

49-
$slug = $linkMatch.Groups['slug'].Value.ToLowerInvariant()
50-
$specPageUrl = Resolve-OpenSpecAbsoluteUrl -BaseUrl $Uri -RelativeOrAbsoluteUrl ([System.Net.WebUtility]::HtmlDecode($linkMatch.Groups['href'].Value))
51-
$title = ($labelText -replace '^\s*\[(?:MS|MC)-[A-Z0-9-]+\]\s*:\s*', '').Trim()
52-
if ([string]::IsNullOrWhiteSpace($title)) {
53-
$title = $protocolId
54-
}
47+
$labelText = ConvertFrom-OpenSpecHtml -Html $linkMatch.Groups['text'].Value
48+
$idMatch = $idRegex.Match($labelText)
49+
if (-not $idMatch.Success) {
50+
continue
51+
}
5552

56-
$description = ''
57-
$cells = [regex]::Matches($rowHtml, $cellRegex)
58-
if ($cells.Count -ge 2) {
59-
$description = (ConvertFrom-OpenSpecHtml -Html $cells[1].Groups['content'].Value).Trim()
60-
}
53+
$protocolId = $idMatch.Groups['id'].Value.ToUpperInvariant()
54+
if (-not $seen.Add($protocolId)) {
55+
continue
56+
}
6157

62-
$entries.Add([pscustomobject]@{
63-
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
64-
ProtocolId = $protocolId
65-
Title = $title
66-
Description = $description
67-
SpecPageUrl = $specPageUrl
68-
Slug = $slug
69-
SourcePage = $Uri
70-
})
71-
}
58+
$slug = $linkMatch.Groups['slug'].Value.ToLowerInvariant()
59+
$specPageUrl = Resolve-OpenSpecAbsoluteUrl -BaseUrl $SourceUri -RelativeOrAbsoluteUrl ([System.Net.WebUtility]::HtmlDecode($linkMatch.Groups['href'].Value))
60+
$title = ($labelText -replace '^\s*\[(?:MS|MC)-[A-Z0-9-]+\]\s*:\s*', '').Trim()
61+
if ([string]::IsNullOrWhiteSpace($title)) {
62+
$title = $protocolId
63+
}
7264

73-
if ($entries.Count -eq 0) {
74-
$protocolPattern = '\[(?<id>(?:MS|MC)-[A-Z0-9-]+)\]'
75-
$idMatches = [regex]::Matches($html, $protocolPattern, 'IgnoreCase')
76-
$protocolIds = $idMatches |
77-
ForEach-Object { $_.Groups['id'].Value.ToUpperInvariant() } |
78-
Sort-Object -Unique
65+
$description = ''
66+
$cells = [regex]::Matches($rowHtml, $cellRegex)
67+
if ($cells.Count -ge 2) {
68+
$description = (ConvertFrom-OpenSpecHtml -Html $cells[1].Groups['content'].Value).Trim()
69+
}
7970

80-
foreach ($protocolId in $protocolIds) {
8171
$entries.Add([pscustomobject]@{
8272
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
8373
ProtocolId = $protocolId
84-
Title = $protocolId
85-
Description = ''
86-
SpecPageUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($protocolId.ToLowerInvariant())"
87-
Slug = $protocolId.ToLowerInvariant()
88-
SourcePage = $Uri
74+
Title = $title
75+
Description = $description
76+
SpecPageUrl = $specPageUrl
77+
Slug = $slug
78+
SourcePage = $SourceUri
8979
})
80+
$addedCount++
9081
}
82+
83+
if ($AllowFallback -and $addedCount -eq 0) {
84+
$protocolPattern = '\[(?<id>(?:MS|MC)-[A-Z0-9-]+)\]'
85+
$idMatches = [regex]::Matches($html, $protocolPattern, 'IgnoreCase')
86+
$protocolIds = $idMatches |
87+
ForEach-Object { $_.Groups['id'].Value.ToUpperInvariant() } |
88+
Sort-Object -Unique
89+
90+
foreach ($protocolId in $protocolIds) {
91+
if (-not $seen.Add($protocolId)) {
92+
continue
93+
}
94+
95+
$entries.Add([pscustomobject]@{
96+
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
97+
ProtocolId = $protocolId
98+
Title = $protocolId
99+
Description = ''
100+
SpecPageUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($protocolId.ToLowerInvariant())"
101+
Slug = $protocolId.ToLowerInvariant()
102+
SourcePage = $SourceUri
103+
})
104+
}
105+
}
106+
}
107+
108+
& $addFromPage -SourceUri $Uri -AllowFallback
109+
110+
if ($IncludeOverviewDocs) {
111+
& $addFromPage -SourceUri $script:OpenSpecOverviewDocsUri
91112
}
92113

93114
if ($IncludeReferenceSpecs) {

AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ function Save-OpenSpecDocument {
8484
}
8585

8686
$toDownload = [System.Collections.Generic.List[object]]::new()
87+
$existsResults = [System.Collections.Generic.List[object]]::new()
8788
foreach ($link in $links) {
8889
$fileName = $link.FileName
8990
if ([string]::IsNullOrWhiteSpace($fileName)) {
@@ -93,15 +94,15 @@ function Save-OpenSpecDocument {
9394
$destination = Join-Path -Path $OutputPath -ChildPath $fileName
9495

9596
if ((Test-Path -LiteralPath $destination) -and -not $Force) {
96-
[pscustomobject]@{
97+
[void]$existsResults.Add([pscustomobject]@{
9798
PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
9899
ProtocolId = $link.ProtocolId
99100
Format = $link.Format
100101
Url = $link.Url
101102
Path = $destination
102103
Status = 'Exists'
103104
Size = (Get-Item -LiteralPath $destination).Length
104-
}
105+
})
105106
continue
106107
}
107108

@@ -157,8 +158,31 @@ function Save-OpenSpecDocument {
157158
}
158159
}
159160

161+
$tryDocxFallback = {
162+
param($result, $destination)
163+
if ($result.Status -ne 'Failed' -or $result.Format -ne 'DOCX' -or -not $result.ProtocolId) { return $result }
164+
$fallbacks = Get-OpenSpecDocxFallbackUrls -ProtocolId $result.ProtocolId
165+
foreach ($url in $fallbacks) {
166+
if ($url -eq $result.Url) { continue }
167+
try {
168+
Invoke-WebRequest -Uri $url -OutFile $destination -MaximumRedirection 8 -ErrorAction Stop
169+
return [pscustomobject]@{
170+
PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
171+
ProtocolId = $result.ProtocolId
172+
Format = $result.Format
173+
Url = $url
174+
Path = $destination
175+
Status = 'Downloaded'
176+
Size = (Get-Item -LiteralPath $destination).Length
177+
}
178+
}
179+
catch { continue }
180+
}
181+
return $result
182+
}
183+
160184
$useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $toDownload.Count -gt 1
161-
if ($useParallel) {
185+
$results = if ($useParallel) {
162186
$toDownload | ForEach-Object -Parallel {
163187
$link = $_.Link
164188
$destination = $_.Destination
@@ -212,5 +236,18 @@ function Save-OpenSpecDocument {
212236
& $downloadOne -link $item.Link -destination $item.Destination
213237
}
214238
}
239+
240+
# Retry failed DOCX via RSS fallback URLs (e.g. MS-THCH, MS-MQOD with stale Learn-page links)
241+
$downloadResults = New-Object System.Collections.Generic.List[object]
242+
$i = 0
243+
foreach ($r in @($results)) {
244+
$dest = $toDownload[$i].Destination
245+
$r = & $tryDocxFallback -result $r -destination $dest
246+
[void]$downloadResults.Add($r)
247+
$i++
248+
}
249+
250+
$existsResults
251+
$downloadResults
215252
}
216253
}

0 commit comments

Comments
 (0)