Skip to content

Commit 85105bf

Browse files
Speed up workflow: parallel conversion, corrupt-DOCX resilience, repro and test scripts
Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent cf6e4eb commit 85105bf

6 files changed

Lines changed: 176 additions & 6 deletions

File tree

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Convert all Windows protocol specs to markdown, build a clean publish tree,
2+
# then force-push it to an orphaned 'publish' branch (e.g. for GitHub Pages).
3+
# Conversion runs in parallel (PowerShell 7) to reduce run time.
4+
name: Convert and publish
5+
6+
on:
7+
workflow_dispatch:
8+
9+
jobs:
10+
convert-and-publish:
11+
runs-on: windows-latest
12+
steps:
13+
- name: Checkout repository
14+
uses: actions/checkout@v4
15+
16+
- name: Install OpenXML module
17+
shell: pwsh
18+
run: |
19+
Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
20+
Install-Module -Name OpenXML -Force -Scope CurrentUser
21+
22+
- name: Import module and convert all specs
23+
shell: pwsh
24+
run: |
25+
Import-Module .\AwakeCoding.OpenSpecs -Force
26+
Get-OpenSpecCatalog |
27+
Save-OpenSpecDocument -Format DOCX -OutputPath ./downloads-convert -Force |
28+
Where-Object { $_.Status -in 'Downloaded', 'Exists' } |
29+
Convert-OpenSpecToMarkdown -OutputPath ./converted-specs -Force -Parallel -ThrottleLimit 4
30+
31+
- name: Build publish directory and index
32+
shell: pwsh
33+
run: |
34+
Import-Module .\AwakeCoding.OpenSpecs -Force
35+
.\scripts\Prepare-Publish.ps1 -ConvertedSpecsPath ./converted-specs -PublishPath ./publish
36+
Update-OpenSpecIndex -Path ./publish
37+
38+
- name: Zip publish contents
39+
shell: pwsh
40+
run: |
41+
Compress-Archive -Path ./publish/* -DestinationPath ./publish.zip -Force
42+
43+
- name: Upload publish artifact
44+
uses: actions/upload-artifact@v4
45+
with:
46+
name: publish
47+
path: publish.zip
48+
49+
- name: Push to orphaned publish branch
50+
shell: pwsh
51+
working-directory: publish
52+
env:
53+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54+
run: |
55+
$RemoteRepo = "https://${Env:GITHUB_ACTOR}:${Env:GITHUB_TOKEN}@github.com/${Env:GITHUB_REPOSITORY}.git"
56+
git init
57+
git config user.name "GitHub Actions"
58+
git config user.email "github-actions-bot@users.noreply.github.com"
59+
git add .
60+
git commit -m "Publish converted Open Specs markdown (${Env:GITHUB_REPOSITORY})"
61+
git push --force "${RemoteRepo}" "HEAD:publish"

AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ function Convert-OpenSpecToMarkdown {
1111
[ValidateSet('Auto', 'DOCX', 'PDF')]
1212
[string]$SourceFormat = 'Auto',
1313

14-
[switch]$Force
14+
[switch]$Force,
15+
16+
[switch]$Parallel,
17+
18+
[int]$ThrottleLimit = 4
1519
)
1620

1721
begin {
@@ -39,6 +43,19 @@ function Convert-OpenSpecToMarkdown {
3943
throw 'Provide InputObject or Path for conversion.'
4044
}
4145

46+
$useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $items.Count -gt 1
47+
if ($useParallel) {
48+
$moduleBase = (Get-Module AwakeCoding.OpenSpecs).ModuleBase
49+
$outputPathArg = $OutputPath
50+
$forceArg = $Force
51+
$sourceFormatArg = $SourceFormat
52+
$items | ForEach-Object -Parallel {
53+
Import-Module (Join-Path $using:moduleBase 'AwakeCoding.OpenSpecs.psd1') -Force | Out-Null
54+
Convert-OpenSpecToMarkdown -Path $_.Path -OutputPath $using:outputPathArg -Force:$using:forceArg -SourceFormat $using:sourceFormatArg
55+
} -ThrottleLimit $ThrottleLimit
56+
return
57+
}
58+
4259
$toolchain = Get-OpenSpecToolchain
4360

4461
foreach ($item in $items) {
@@ -109,6 +126,7 @@ function Convert-OpenSpecToMarkdown {
109126
}
110127

111128
$conversionStep = $null
129+
try {
112130
if ($resolvedFormat -eq 'DOCX') {
113131
$toolchain = Get-OpenSpecToolchain -RequireDocxConverter
114132
$rawMarkdownPath = Join-Path -Path $artifactDirectory -ChildPath 'raw-docx.md'
@@ -214,6 +232,26 @@ function Convert-OpenSpecToMarkdown {
214232
ErrorCount = $errorCount
215233
ReportPath = $reportPath
216234
}
235+
}
236+
catch {
237+
$errMsg = $_.Exception.Message
238+
Write-Warning "Conversion failed for $protocolId : $errMsg"
239+
[pscustomobject]@{
240+
PSTypeName = 'AwakeCoding.OpenSpecs.ConversionResult'
241+
ProtocolId = $protocolId
242+
SourcePath = $sourcePath
243+
SourceFormat = $resolvedFormat
244+
MarkdownPath = $null
245+
Status = 'Failed'
246+
Strategy = $null
247+
IssueCount = 0
248+
InfoCount = 0
249+
WarningCount = 0
250+
ErrorCount = 1
251+
ReportPath = $null
252+
Error = $errMsg
253+
}
254+
}
217255
}
218256
}
219257
}

AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@ function Invoke-OpenSpecConversionPipeline {
1212

1313
[string]$OutputPath = (Join-Path -Path (Get-Location) -ChildPath 'converted-specs'),
1414

15-
[switch]$Force
15+
[switch]$Force,
16+
17+
[switch]$Parallel,
18+
19+
[int]$ThrottleLimit = 4
1620
)
1721

1822
if (-not $ProtocolId -and -not $Query) {
@@ -26,7 +30,6 @@ function Invoke-OpenSpecConversionPipeline {
2630
Save-OpenSpecDocument -Query $Query -Format $Format -OutputPath $DownloadPath -Force:$Force
2731
}
2832

29-
$downloadResults |
30-
Where-Object { $_.Status -in 'Downloaded', 'Exists' } |
31-
Convert-OpenSpecToMarkdown -OutputPath $OutputPath -Force:$Force
33+
$toConvert = $downloadResults | Where-Object { $_.Status -in 'Downloaded', 'Exists' }
34+
$toConvert | Convert-OpenSpecToMarkdown -OutputPath $OutputPath -Force:$Force -Parallel:$Parallel -ThrottleLimit $ThrottleLimit
3235
}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ These folders are tracked with `.gitkeep`, while their contents are ignored via
3535
- `Save-OpenSpecDocument` - Downloads selected documents.
3636
- `Test-OpenSpecDownload` - End-to-end validation for a set of protocol IDs.
3737
- `Convert-OpenSpecToMarkdown` - Converts downloaded DOCX/PDF files to Markdown.
38-
- `Invoke-OpenSpecConversionPipeline` - Download + convert in one pipeline.
38+
- `Invoke-OpenSpecConversionPipeline` - Download + convert in one pipeline. Use `-Parallel -ThrottleLimit N` (PowerShell 7+) to run conversions in parallel and reduce CI time.
3939
- `Get-OpenSpecConversionReport` - Reads conversion report artifacts.
4040
- `Test-OpenSpecMarkdownFidelity` - Runs lightweight fidelity checks on generated Markdown.
4141

scripts/Repro-CorruptDocx.ps1

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Reproduce corrupt DOCX failure locally: create a truncated file and run conversion.
2+
# The CI failure was: "End of Central Directory record could not be found" when opening a DOCX (ZIP).
3+
$ErrorActionPreference = 'Stop'
4+
Import-Module (Join-Path $PSScriptRoot '..' 'AwakeCoding.OpenSpecs') -Force
5+
6+
$workDir = Join-Path $env:TEMP "openspecs-repro-$(Get-Date -Format 'yyyyMMddHHmmss')"
7+
New-Item -ItemType Directory -Path $workDir -Force | Out-Null
8+
$downloadsDir = Join-Path $workDir 'downloads'
9+
$convertedDir = Join-Path $workDir 'converted'
10+
New-Item -ItemType Directory -Path $downloadsDir -Force | Out-Null
11+
New-Item -ItemType Directory -Path $convertedDir -Force | Out-Null
12+
13+
Write-Host "Work dir: $workDir"
14+
15+
# Option 1: Download one real DOCX then truncate it to simulate corruption
16+
Write-Host "Downloading one spec (MS-NVGREE) to get a valid DOCX..."
17+
$null = Get-OpenSpecCatalog | Where-Object { $_.ProtocolId -eq 'MS-NVGREE' } |
18+
Save-OpenSpecDocument -Format DOCX -OutputPath $downloadsDir -Force
19+
20+
$goodDocx = Get-ChildItem -LiteralPath $downloadsDir -Filter '*.docx' | Select-Object -First 1
21+
if (-not $goodDocx) {
22+
Write-Error "No DOCX downloaded"
23+
exit 1
24+
}
25+
26+
# Create a corrupt copy (truncate so ZIP EOCD is missing)
27+
$corruptPath = Join-Path $downloadsDir 'CORRUPT-[MS-TEST].docx'
28+
$bytes = [System.IO.File]::ReadAllBytes($goodDocx.FullName)
29+
$truncatedSize = [Math]::Max(100, [int]($bytes.Length * 0.3))
30+
[System.IO.File]::WriteAllBytes($corruptPath, $bytes[0..($truncatedSize-1)])
31+
Write-Host "Created truncated (corrupt) file: $corruptPath ($truncatedSize bytes)"
32+
33+
# Run conversion: should hit "End of Central Directory record could not be found"
34+
Write-Host "Converting (expect failure for corrupt file, then success for good file)..."
35+
$results = @(
36+
[pscustomobject]@{ Path = $corruptPath; ProtocolId = 'MS-TEST' },
37+
[pscustomobject]@{ Path = $goodDocx.FullName; ProtocolId = 'MS-NVGREE' }
38+
)
39+
$conversionResults = $results | Convert-OpenSpecToMarkdown -OutputPath $convertedDir -Force
40+
41+
Write-Host "`nConversion results:"
42+
$conversionResults | Format-Table ProtocolId, Status, Error -AutoSize
43+
44+
$failed = $conversionResults | Where-Object { $_.Status -eq 'Failed' }
45+
$converted = $conversionResults | Where-Object { $_.Status -eq 'Converted' }
46+
if ($failed -and $failed.ProtocolId -eq 'MS-TEST' -and $converted -and $converted.ProtocolId -eq 'MS-NVGREE') {
47+
Write-Host "`nRepro succeeded: corrupt file failed gracefully, good file converted."
48+
} else {
49+
Write-Host "`nUnexpected: Failed=$($failed.ProtocolId) Converted=$($converted.ProtocolId)"
50+
}
51+
52+
Remove-Item -LiteralPath $workDir -Recurse -Force -ErrorAction SilentlyContinue
53+
Write-Host "`nDone. Cleaned up $workDir"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Quick test: parallel conversion with 2 specs
2+
$ErrorActionPreference = 'Stop'
3+
Import-Module (Join-Path $PSScriptRoot '..' 'AwakeCoding.OpenSpecs') -Force
4+
5+
$d = Join-Path $env:TEMP 'openspecs-parallel-test'
6+
New-Item -ItemType Directory -Path $d -Force | Out-Null
7+
$outDir = Join-Path $d 'out'
8+
9+
Get-OpenSpecCatalog | Select-Object -First 2 | Save-OpenSpecDocument -Format DOCX -OutputPath $d -Force |
10+
Where-Object { $_.Status -in 'Downloaded', 'Exists' } |
11+
Convert-OpenSpecToMarkdown -OutputPath $outDir -Force -Parallel -ThrottleLimit 2 |
12+
Format-Table ProtocolId, Status -AutoSize
13+
14+
Remove-Item $d -Recurse -Force -ErrorAction SilentlyContinue
15+
Write-Host 'Done.'

0 commit comments

Comments
 (0)