Skip to content

Commit 9885a8c

Browse files
RDP broken links: Strategy A/B/D + SourceLinkMetadata fix
- Strategy A: Build GuidToSection from InternalHyperlinks by matching link text to SectionToTitle (with wildcard escaping for [MS-XXX] patterns) - Strategy B: Cross-paragraph bookmark association for section_<guid> - Strategy D: Build GuidToGlossarySlug from InternalHyperlinks with gt_ anchors - Fix SourceLinkMetadata access in Invoke-OpenSpecMarkdownCleanup for OrderedDictionary (use direct .GuidToSection instead of PSObject.Properties) - Add Analyze-DocxLinkMetadata.ps1 for link metadata analysis - Legal notice extraction, LEGAL.md template, Build-Publish updates - Ignore publish/ in .gitignore Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 4b9a5de commit 9885a8c

7 files changed

Lines changed: 408 additions & 8 deletions

File tree

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
artifacts/
22
downloads*/
33
converted*/
4-
reports*/
4+
reports*/
5+
publish/

AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
110110
}
111111
$inGlossary = $false
112112
$glossaryHeadingLevel = 0
113+
$pendingSectionGuids = New-Object System.Collections.Generic.List[string]
113114

114115
# Resolve media output directory for image extraction.
115116
$resolvedMediaDir = $null
@@ -173,6 +174,14 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
173174

174175
$sectionAnchor = $anchorInfo.SectionAnchor
175176
if (-not [string]::IsNullOrWhiteSpace($sectionAnchor)) {
177+
# Strategy B: Resolve cross-paragraph section GUIDs from previous paragraph
178+
foreach ($g in $pendingSectionGuids) {
179+
if (-not $linkMetadata.GuidToSection.ContainsKey($g)) {
180+
$linkMetadata.GuidToSection[$g] = $sectionAnchor
181+
}
182+
}
183+
$pendingSectionGuids.Clear()
184+
176185
if (-not $linkMetadata.SectionToTitle.ContainsKey($sectionAnchor)) {
177186
$linkMetadata.SectionToTitle[$sectionAnchor] = $headingText
178187
}
@@ -215,6 +224,18 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
215224
}
216225
}
217226
}
227+
228+
# Strategy B: Paragraph has section_<guid> bookmarks but no SectionAnchor — defer to next paragraph
229+
if ([string]::IsNullOrWhiteSpace($anchorInfo.SectionAnchor)) {
230+
foreach ($bookmarkName in @($anchorInfo.BookmarkNames)) {
231+
if ($bookmarkName -match '(?i)^section_(?<guid>[a-f0-9]{32})$') {
232+
$g = $Matches['guid'].ToLowerInvariant()
233+
if (-not $linkMetadata.GuidToSection.ContainsKey($g) -and -not $pendingSectionGuids.Contains($g)) {
234+
[void]$pendingSectionGuids.Add($g)
235+
}
236+
}
237+
}
238+
}
218239
}
219240
elseif ($child.LocalName -eq 'tbl') {
220241
$tableLines = ConvertFrom-OpenSpecOpenXmlTable -TableNode $child -NamespaceManager $nsmgr -RelationshipMap $relationshipMap -Archive $archive -MediaOutputDirectory $resolvedMediaDir
@@ -231,6 +252,80 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
231252
throw 'OpenXml conversion produced empty markdown output.'
232253
}
233254

255+
# Strategy A: Build GuidToSection from InternalHyperlinks by matching link text to SectionToTitle
256+
if ($linkMetadata.SectionToTitle.Count -eq 0) {
257+
$headingRegex = [regex]::new('^(?<level>#{1,6})\s+(?<num>\d+(?:\.\d+)*)\s+(?<title>.+)$', [System.Text.RegularExpressions.RegexOptions]::Multiline)
258+
foreach ($m in $headingRegex.Matches($markdown)) {
259+
$sectionAnchor = "Section_$($m.Groups['num'].Value)"
260+
$fullTitle = "$($m.Groups['num'].Value) $($m.Groups['title'].Value.Trim())"
261+
if (-not $linkMetadata.SectionToTitle.ContainsKey($sectionAnchor)) {
262+
$linkMetadata.SectionToTitle[$sectionAnchor] = $fullTitle
263+
}
264+
}
265+
}
266+
$titleToSection = @{}
267+
foreach ($entry in $linkMetadata.SectionToTitle.GetEnumerator()) {
268+
$key = [string]$entry.Key
269+
$val = ([string]$entry.Value -replace '\s+', ' ').Trim()
270+
if (-not [string]::IsNullOrWhiteSpace($val)) {
271+
$titleToSection[$val] = $key
272+
$withoutNum = ($val -replace '^\d+(?:\.\d+)*\s+', '').Trim()
273+
if ($withoutNum -and -not $titleToSection.ContainsKey($withoutNum)) {
274+
$titleToSection[$withoutNum] = $key
275+
}
276+
}
277+
}
278+
$sectionGuidRegex = [regex]::new('^(?:[Ss]ection_)?([a-f0-9]{32})$')
279+
$internalLinksArray = $linkMetadata.InternalHyperlinks.ToArray()
280+
foreach ($link in $internalLinksArray) {
281+
$anchor = [string]$link.Anchor
282+
$text = ([string]$link.Text -replace '\s+', ' ').Trim()
283+
$m = $sectionGuidRegex.Match($anchor)
284+
if (-not $m.Success) { continue }
285+
$guid = $m.Groups[1].Value.ToLowerInvariant()
286+
if ($linkMetadata.GuidToSection.ContainsKey($guid)) { continue }
287+
$matchedSection = $null
288+
if ($titleToSection.ContainsKey($text)) {
289+
$matchedSection = $titleToSection[$text]
290+
}
291+
else {
292+
foreach ($tit in $titleToSection.Keys) {
293+
if ($tit -eq $text) { $matchedSection = $titleToSection[$tit]; break }
294+
$textEsc = [Management.Automation.WildcardPattern]::Escape($text)
295+
$titEsc = [Management.Automation.WildcardPattern]::Escape($tit)
296+
if ($tit -like "*$textEsc*" -and $text.Length -ge 8) { $matchedSection = $titleToSection[$tit]; break }
297+
if ($text -like "*$titEsc*" -and $tit.Length -ge 8) { $matchedSection = $titleToSection[$tit]; break }
298+
}
299+
}
300+
if ($matchedSection) {
301+
$linkMetadata.GuidToSection[$guid] = $matchedSection
302+
}
303+
}
304+
305+
# Strategy D: Build GuidToGlossarySlug from InternalHyperlinks with gt_<guid> anchors
306+
$termToSlug = @{}
307+
$glossaryDefRegex = [regex]::new('^\s*\*\*(?<term>[^*]+)\*\*\s*:\s*', [System.Text.RegularExpressions.RegexOptions]::Multiline)
308+
foreach ($gm in $glossaryDefRegex.Matches($markdown)) {
309+
$term = $gm.Groups['term'].Value.Trim()
310+
$slug = Get-OpenSpecGlossarySlugFromTerm -Term $term
311+
$termToSlug[$term] = $slug
312+
if ($term -match '^(.+?)\s+\(([^)]+)\)\s*$') {
313+
$termToSlug[$Matches[2].Trim()] = $slug
314+
}
315+
}
316+
$gtGuidRegex = [regex]::new('^gt_([a-f0-9\-]{36})$', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
317+
foreach ($link in $internalLinksArray) {
318+
$anchor = [string]$link.Anchor
319+
$text = ([string]$link.Text -replace '\s+', ' ').Trim()
320+
$m = $gtGuidRegex.Match($anchor)
321+
if (-not $m.Success) { continue }
322+
$guid = $m.Groups[1].Value.ToLowerInvariant()
323+
if ($linkMetadata.GuidToGlossarySlug.ContainsKey($guid)) { continue }
324+
if ($termToSlug.ContainsKey($text)) {
325+
$linkMetadata.GuidToGlossarySlug[$guid] = $termToSlug[$text]
326+
}
327+
}
328+
234329
$linkMetadata.Stats.GuidSectionMapCount = $linkMetadata.GuidToSection.Count
235330
$linkMetadata.Stats.TocAliasCount = $linkMetadata.TocAlias.Count
236331
$linkMetadata.Stats.GlossaryGuidMapCount = $linkMetadata.GuidToGlossarySlug.Count

AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ function Invoke-OpenSpecMarkdownCleanup {
6868
$result = $tocResult.Markdown
6969
foreach ($issue in $tocResult.Issues) { [void]$issues.Add($issue) }
7070

71-
$sourceGuidToSection = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['GuidToSection']) { $SourceLinkMetadata.GuidToSection } else { $null }
71+
$sourceGuidToSection = if ($SourceLinkMetadata -and $SourceLinkMetadata.GuidToSection) { $SourceLinkMetadata.GuidToSection } else { $null }
7272
$guidResult = Resolve-OpenSpecGuidSectionAnchors -Markdown $result -GuidToSectionMap $sourceGuidToSection
7373
$result = $guidResult.Markdown
7474
foreach ($issue in $guidResult.Issues) { [void]$issues.Add($issue) }
@@ -110,7 +110,7 @@ function Invoke-OpenSpecMarkdownCleanup {
110110
})
111111
}
112112

113-
$sourceSectionToTitle = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['SectionToTitle']) { $SourceLinkMetadata.SectionToTitle } else { $null }
113+
$sourceSectionToTitle = if ($SourceLinkMetadata -and $null -ne $SourceLinkMetadata.SectionToTitle) { $SourceLinkMetadata.SectionToTitle } else { $null }
114114
$guidByHeadingResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $result -SectionToTitleMap $sourceSectionToTitle
115115
$result = $guidByHeadingResult.Markdown
116116
if ($guidByHeadingResult.LinksRepaired -gt 0) {
@@ -122,7 +122,7 @@ function Invoke-OpenSpecMarkdownCleanup {
122122
})
123123
}
124124

125-
$sourceGuidToGlossarySlug = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['GuidToGlossarySlug']) { $SourceLinkMetadata.GuidToGlossarySlug } else { $null }
125+
$sourceGuidToGlossarySlug = if ($SourceLinkMetadata -and $null -ne $SourceLinkMetadata.GuidToGlossarySlug) { $SourceLinkMetadata.GuidToGlossarySlug } else { $null }
126126
$glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $result -GuidToGlossarySlugMap $sourceGuidToGlossarySlug
127127
$result = $glossaryResult.Markdown
128128
if ($glossaryResult.AnchorsInjected -gt 0 -or $glossaryResult.LinksRepaired -gt 0) {
@@ -146,14 +146,21 @@ function Invoke-OpenSpecMarkdownCleanup {
146146
})
147147
}
148148

149+
$legalResult = Add-LegalNoticeLinkAfterToc -Markdown $result
150+
$result = $legalResult.Markdown
151+
149152
$newLine = [Environment]::NewLine
150153
$result = [regex]::Replace($result, "(`r?`n){3,}", "$newLine$newLine")
151154

152-
[pscustomobject]@{
155+
$out = [pscustomobject]@{
153156
PSTypeName = 'AwakeCoding.OpenSpecs.MarkdownCleanupResult'
154157
Markdown = $result
155158
Issues = $issues.ToArray()
156159
}
160+
if ($frontMatterResult.Removed -and $frontMatterResult.PSObject.Properties['ExtractedBoilerplate']) {
161+
Add-Member -InputObject $out -NotePropertyName 'ExtractedBoilerplate' -NotePropertyValue $frontMatterResult.ExtractedBoilerplate
162+
}
163+
$out
157164
}
158165

159166
function ConvertFrom-OpenSpecHtmlTables {
@@ -935,6 +942,50 @@ function ConvertTo-OpenSpecGitHubFriendlyToc {
935942
}
936943
}
937944

945+
function Add-LegalNoticeLinkAfterToc {
946+
[CmdletBinding()]
947+
param(
948+
[Parameter(Mandatory)]
949+
[string]$Markdown
950+
)
951+
952+
$newLine = [Environment]::NewLine
953+
$legalLine = "For the legal notice and IP terms, see [LEGAL.md](../LEGAL.md)."
954+
955+
$sectionAnchorRegex = [regex]::new('<a\s+id="Section_\d', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
956+
$firstSectionMatch = $sectionAnchorRegex.Match($Markdown)
957+
if (-not $firstSectionMatch.Success) {
958+
return [pscustomobject]@{ Markdown = $Markdown }
959+
}
960+
$beforeContent = $Markdown.Substring(0, $firstSectionMatch.Index)
961+
962+
$detailsCloseRegex = [regex]::new('</details>', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
963+
$lastDetailsMatch = $null
964+
foreach ($m in $detailsCloseRegex.Matches($beforeContent)) {
965+
$lastDetailsMatch = $m
966+
}
967+
if (-not $lastDetailsMatch) {
968+
return [pscustomobject]@{ Markdown = $Markdown }
969+
}
970+
971+
$insertEnd = $lastDetailsMatch.Index + $lastDetailsMatch.Length
972+
$trailing = $beforeContent.Substring($insertEnd)
973+
$trailingNewlines = ''
974+
if ($trailing -match '^(\r?\n)+') {
975+
$trailingNewlines = $Matches[1]
976+
$insertEnd += $Matches[1].Length
977+
}
978+
$before = $Markdown.Substring(0, $insertEnd)
979+
$after = $Markdown.Substring($insertEnd)
980+
981+
$insertion = $trailingNewlines + $legalLine + $newLine + $newLine
982+
$result = $before + $insertion + $after
983+
984+
[pscustomobject]@{
985+
Markdown = $result
986+
}
987+
}
988+
938989
function ConvertTo-OpenSpecNormalizedEncodedBracketUrls {
939990
[CmdletBinding()]
940991
param(
@@ -1414,10 +1465,14 @@ function Remove-OpenSpecFrontMatterBoilerplate {
14141465
$removed = $true
14151466
}
14161467

1417-
[pscustomobject]@{
1468+
$out = [pscustomobject]@{
14181469
Markdown = $result
14191470
Removed = $removed
14201471
}
1472+
if ($removed -and $blockContent) {
1473+
Add-Member -InputObject $out -NotePropertyName 'ExtractedBoilerplate' -NotePropertyValue $blockContent
1474+
}
1475+
$out
14211476
}
14221477

14231478
function Add-OpenSpecSectionAnchors {

AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,19 @@ function Convert-OpenSpecToMarkdown {
191191

192192
$cleaned.Markdown | Set-Content -LiteralPath $markdownPath -Encoding UTF8
193193

194+
if ($protocolId -eq 'MS-DTYP' -and $cleaned.PSObject.Properties['ExtractedBoilerplate']) {
195+
$legalDir = Join-Path -Path $OutputPath -ChildPath '_legal'
196+
if (-not (Test-Path -LiteralPath $legalDir)) {
197+
[void](New-Item -Path $legalDir -ItemType Directory -Force)
198+
}
199+
$legalContent = $cleaned.ExtractedBoilerplate.Trim()
200+
if ($legalContent -and -not ($legalContent -match '^(#|\*\*[^*]+\*\*)')) {
201+
$legalContent = "# Intellectual Property Rights Notice for Open Specifications Documentation`n`n" + $legalContent
202+
}
203+
$legalPath = Join-Path -Path $legalDir -ChildPath 'LEGAL.md'
204+
$legalContent | Set-Content -LiteralPath $legalPath -Encoding UTF8
205+
}
206+
194207
$layoutModelPath = Join-Path -Path $artifactDirectory -ChildPath 'layout-model.json'
195208
$allIssues | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $layoutModelPath -Encoding UTF8
196209

0 commit comments

Comments
 (0)