Skip to content

Commit 9e589b1

Browse files
committed
Make section-link repair deterministic and add regression tests
1 parent 6ed3375 commit 9e589b1

2 files changed

Lines changed: 165 additions & 32 deletions

File tree

AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1

Lines changed: 136 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,9 +1664,62 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch {
16641664

16651665
$newLine = [Environment]::NewLine
16661666
$lineArray = $Markdown -split '\r?\n'
1667-
$titleToSection = @{}
1667+
$titleToSections = @{}
1668+
$sectionOrder = @{}
16681669
$anchorIdRegex = [regex]::new('<a\s+id="([^"]+)"\s*></a>', 'IgnoreCase')
16691670

1671+
$addSectionOrder = {
1672+
param([string]$sectionId, [int]$index)
1673+
1674+
if ([string]::IsNullOrWhiteSpace($sectionId)) {
1675+
return
1676+
}
1677+
1678+
if (-not $sectionOrder.ContainsKey($sectionId)) {
1679+
$sectionOrder[$sectionId] = $index
1680+
}
1681+
}
1682+
1683+
$addTitleMapping = {
1684+
param([string]$title, [string]$sectionId)
1685+
1686+
if ([string]::IsNullOrWhiteSpace($title) -or [string]::IsNullOrWhiteSpace($sectionId)) {
1687+
return
1688+
}
1689+
1690+
$norm = ($title -replace '\*+', '' -replace '\s+', ' ').Trim()
1691+
if ([string]::IsNullOrWhiteSpace($norm)) {
1692+
return
1693+
}
1694+
1695+
$key = $norm.ToLowerInvariant()
1696+
if (-not $titleToSections.ContainsKey($key)) {
1697+
$titleToSections[$key] = [System.Collections.Generic.List[string]]::new()
1698+
}
1699+
1700+
$bucket = $titleToSections[$key]
1701+
if (-not $bucket.Contains($sectionId)) {
1702+
[void]$bucket.Add($sectionId)
1703+
}
1704+
}
1705+
1706+
# Capture document-order ranking for section anchors and numbered headings.
1707+
$orderIndex = 0
1708+
for ($i = 0; $i -lt $lineArray.Count; $i++) {
1709+
$line = $lineArray[$i]
1710+
1711+
if ($line -match '^\s*<a\s+id="(Section_\d+(?:\.\d+)*)"\s*></a>\s*$') {
1712+
& $addSectionOrder $Matches[1] $orderIndex
1713+
$orderIndex++
1714+
continue
1715+
}
1716+
1717+
if ($line -match '^\s*#{1,6}\s+(\d+(?:\.\d+)*)\s+.+$') {
1718+
& $addSectionOrder "Section_$($Matches[1])" $orderIndex
1719+
$orderIndex++
1720+
}
1721+
}
1722+
16701723
# Collect all existing anchors
16711724
$existingAnchors = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
16721725
foreach ($m in $anchorIdRegex.Matches($Markdown)) {
@@ -1678,11 +1731,10 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch {
16781731
$sectionId = [string]$entry.Key
16791732
$title = [string]$entry.Value
16801733
if ([string]::IsNullOrWhiteSpace($sectionId) -or [string]::IsNullOrWhiteSpace($title)) { continue }
1681-
$norm = ($title -replace '\s+', ' ').Trim()
1682-
if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId }
1734+
1735+
& $addTitleMapping $title $sectionId
16831736
$withoutNum = $title -replace '^\d+(?:\.\d+)*\s+', ''
1684-
$normWithout = ($withoutNum -replace '\s+', ' ').Trim()
1685-
if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId }
1737+
& $addTitleMapping $withoutNum $sectionId
16861738
}
16871739
}
16881740

@@ -1694,14 +1746,11 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch {
16941746
$nextLine = if ($i + 1 -lt $lineArray.Count) { $lineArray[$i + 1].Trim() } else { '' }
16951747
if ([string]::IsNullOrWhiteSpace($nextLine)) { continue }
16961748
$title = if ($nextLine -match '^\s*#{1,6}\s+(?<title>.+)$') { $Matches['title'].Trim() } else { $nextLine }
1697-
$norm = ($title -replace '\s+', ' ').Trim()
1698-
if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId }
1749+
& $addTitleMapping $title $sectionId
16991750
$withoutNum = $title -replace '^\d+(?:\.\d+)*\s+', ''
1700-
$normWithout = ($withoutNum -replace '\s+', ' ').Trim()
1701-
if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId }
1751+
& $addTitleMapping $withoutNum $sectionId
17021752
$withoutParen = $title -replace '\s*\([^)]*\)\s*$', '' # "Share Control Header (TS_SHARECONTROLHEADER)" -> "Share Control Header"
1703-
$normNoParen = ($withoutParen -replace '\s+', ' ').Trim()
1704-
if ($normNoParen -and -not $titleToSection.ContainsKey($normNoParen)) { $titleToSection[$normNoParen] = $sectionId }
1753+
& $addTitleMapping $withoutParen $sectionId
17051754
}
17061755
}
17071756

@@ -1714,52 +1763,107 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch {
17141763
$sectionNum = $hm.Groups[1].Value
17151764
$sectionId = "Section_$sectionNum"
17161765
$title = $hm.Groups['title'].Value.Trim()
1717-
$norm = ($title -replace '\s+', ' ').Trim()
1718-
if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId }
1766+
& $addTitleMapping $title $sectionId
17191767
$withoutNum = ($title -replace '^\d+(?:\.\d+)*\s+', '') -replace '\s*\([^)]*\)\s*$', ''
1720-
$normWithout = ($withoutNum -replace '\s+', ' ').Trim()
1721-
if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId }
1768+
& $addTitleMapping $withoutNum $sectionId
17221769
}
17231770
}
17241771

1725-
# Find best section for link text: exact match, prefix match, or extract "(section N.N.N)" from link text.
1772+
# Find best section for link text with deterministic tie-breaking.
17261773
$findSectionForLinkText = {
1727-
param($norm, $titleToSection, $existingAnchors)
1728-
if ($titleToSection.ContainsKey($norm)) { return $titleToSection[$norm] }
1729-
# Extract section number from link text like "Share Control Header (section 2.2.8.1.1.1)"
1730-
if ($norm -match '\(section\s+(\d+(?:\.\d+)*)\)') {
1731-
$extractedId = "Section_$($Matches[1])"
1774+
param($norm, $titleToSections, $existingAnchors, $sectionOrder)
1775+
1776+
if ([string]::IsNullOrWhiteSpace($norm)) {
1777+
return $null
1778+
}
1779+
1780+
$normalized = ($norm -replace '\*+', '' -replace '\s+', ' ').Trim()
1781+
if ([string]::IsNullOrWhiteSpace($normalized)) {
1782+
return $null
1783+
}
1784+
1785+
# Prefer explicit section-number references in link text.
1786+
if ($normalized -match '^(?:section\s+)?(?<num>\d+(?:\.\d+)*)$') {
1787+
$directId = "Section_$($Matches['num'])"
1788+
if ($existingAnchors.Contains($directId)) { return $directId }
1789+
}
1790+
1791+
if ($normalized -match '\(section\s+(?<num>\d+(?:\.\d+)*)\)') {
1792+
$extractedId = "Section_$($Matches['num'])"
17321793
if ($existingAnchors.Contains($extractedId)) { return $extractedId }
17331794
}
1795+
1796+
$normKey = $normalized.ToLowerInvariant()
1797+
if ($titleToSections.ContainsKey($normKey)) {
1798+
$exactSections = @($titleToSections[$normKey] | Sort-Object -Property @(
1799+
@{ Expression = { if ($sectionOrder.ContainsKey($_)) { [int]$sectionOrder[$_] } else { [int]::MaxValue } } },
1800+
@{ Expression = { $_.Length } },
1801+
@{ Expression = { $_ } }
1802+
))
1803+
if ($exactSections.Count -gt 0) {
1804+
return $exactSections[0]
1805+
}
1806+
}
1807+
17341808
$candidates = @()
1735-
foreach ($key in $titleToSection.Keys) {
1736-
if ($key -eq $norm) { return $titleToSection[$key] }
1737-
if ($key.StartsWith($norm + ' ') -or $key.StartsWith($norm + '(')) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } }
1738-
elseif ($norm.StartsWith($key + ' ') -or $norm.StartsWith($key + '(')) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } }
1739-
elseif ($key.StartsWith($norm) -or $norm.StartsWith($key)) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } }
1809+
foreach ($key in @($titleToSections.Keys | Sort-Object -Property @{ Expression = { $_.Length } }, @{ Expression = { $_ } })) {
1810+
$score = $null
1811+
if ($key.StartsWith($normKey + ' ') -or $key.StartsWith($normKey + '(')) {
1812+
$score = 1
1813+
}
1814+
elseif ($normKey.StartsWith($key + ' ') -or $normKey.StartsWith($key + '(')) {
1815+
$score = 2
1816+
}
1817+
elseif ($key.StartsWith($normKey) -or $normKey.StartsWith($key)) {
1818+
$score = 3
1819+
}
1820+
1821+
if ($null -eq $score) {
1822+
continue
1823+
}
1824+
1825+
foreach ($sectionId in $titleToSections[$key]) {
1826+
$candidates += [pscustomobject]@{
1827+
Score = $score
1828+
Key = $key
1829+
KeyLength = $key.Length
1830+
SectionId = $sectionId
1831+
}
1832+
}
17401833
}
1834+
17411835
if ($candidates.Count -eq 1) { return $candidates[0].SectionId }
17421836
if ($candidates.Count -gt 1) {
1743-
# Prefer shortest key (most specific match), e.g. "Status Info PDU" over "Status Info PDU Data (TS_...)"
1744-
$best = $candidates | Sort-Object -Property { $_.Key.Length } | Select-Object -First 1
1837+
$best = $candidates |
1838+
Sort-Object -Property @(
1839+
@{ Expression = { $_.Score } },
1840+
@{ Expression = { [Math]::Abs($_.KeyLength - $normKey.Length) } },
1841+
@{ Expression = { $_.KeyLength } },
1842+
@{ Expression = { if ($sectionOrder.ContainsKey($_.SectionId)) { [int]$sectionOrder[$_.SectionId] } else { [int]::MaxValue } } },
1843+
@{ Expression = { $_.SectionId.Length } },
1844+
@{ Expression = { $_.SectionId } },
1845+
@{ Expression = { $_.Key } }
1846+
) |
1847+
Select-Object -First 1
17451848
return $best.SectionId
17461849
}
1850+
17471851
return $null
17481852
}
17491853
$guidLinkRegex = [regex]::new('\[(?<text>[^\]]+)\]\(#Section_[a-fA-F0-9]{32}\)')
17501854
$result = $guidLinkRegex.Replace($Markdown, {
17511855
param($m)
17521856
$rawText = $m.Groups['text'].Value
17531857
$norm = ($rawText -replace '\*+', '' -replace '\s+', ' ').Trim()
1754-
$sectionId = & $findSectionForLinkText $norm $titleToSection $existingAnchors
1755-
if (-not $sectionId -and $rawText.Trim() -ne $norm) { $sectionId = & $findSectionForLinkText $rawText.Trim() $titleToSection $existingAnchors }
1858+
$sectionId = & $findSectionForLinkText $norm $titleToSections $existingAnchors $sectionOrder
1859+
if (-not $sectionId -and $rawText.Trim() -ne $norm) { $sectionId = & $findSectionForLinkText $rawText.Trim() $titleToSections $existingAnchors $sectionOrder }
17561860
if ($sectionId) { "[$rawText](#$sectionId)" } else { $m.Value }
17571861
})
17581862
$linksRepaired = 0
17591863
foreach ($m in $guidLinkRegex.Matches($Markdown)) {
17601864
$norm = ($m.Groups['text'].Value -replace '\*+', '' -replace '\s+', ' ').Trim()
1761-
$sid = & $findSectionForLinkText $norm $titleToSection $existingAnchors
1762-
if (-not $sid) { $sid = & $findSectionForLinkText $m.Groups['text'].Value.Trim() $titleToSection $existingAnchors }
1865+
$sid = & $findSectionForLinkText $norm $titleToSections $existingAnchors $sectionOrder
1866+
if (-not $sid) { $sid = & $findSectionForLinkText $m.Groups['text'].Value.Trim() $titleToSections $existingAnchors $sectionOrder }
17631867
if ($sid) { $linksRepaired++ }
17641868
}
17651869

tests/AwakeCoding.OpenSpecs.Tests.ps1

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,32 @@ Describe 'Conversion report aggregation' {
7575
Remove-Item -LiteralPath $tempRoot -Recurse -Force
7676
}
7777
}
78+
79+
Describe 'Section GUID link repair' {
80+
It 'is deterministic and prefers explicit section-number targets' {
81+
$markdown = @'
82+
<a id="Section_2.2.1.3"></a>
83+
## 2.2.1.3 MCS Connect Initial PDU
84+
<a id="Section_3.2.5.3.3"></a>
85+
## 3.2.5.3.3 MCS Connect Initial PDU
86+
Numeric reference: [2.2.1.3](#Section_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)
87+
Heading reference: [MCS Connect Initial PDU](#Section_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb)
88+
'@
89+
90+
$module = Get-Module AwakeCoding.OpenSpecs -ErrorAction Stop
91+
$results = 1..5 | ForEach-Object {
92+
& $module {
93+
param([string]$text)
94+
Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $text
95+
} $markdown
96+
}
97+
98+
$firstMarkdown = $results[0].Markdown
99+
foreach ($item in $results) {
100+
$item.Markdown | Should -Be $firstMarkdown
101+
}
102+
103+
$firstMarkdown | Should -Match '\[2\.2\.1\.3\]\(#Section_2\.2\.1\.3\)'
104+
$firstMarkdown | Should -Match '\[MCS Connect Initial PDU\]\(#Section_2\.2\.1\.3\)'
105+
}
106+
}

0 commit comments

Comments
 (0)