@@ -263,18 +263,84 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
263263 }
264264 }
265265 }
266- $titleToSection = @ {}
266+ $getSectionSortKey = {
267+ param ([string ]$sectionId )
268+
269+ if ($sectionId -match ' ^Section_(?<num>\d+(?:\.\d+)*)$' ) {
270+ $parts = @ ($Matches [' num' ] -split ' \.' | ForEach-Object {
271+ if ($_ -match ' ^\d+$' ) { [int ]$_ } else { 0 }
272+ })
273+ $padded = @ ($parts | ForEach-Object { ' {0:D8}' -f $_ })
274+ return (' 0|' + ($padded -join ' .' ))
275+ }
276+
277+ return (' 1|' + $sectionId.ToLowerInvariant ())
278+ }
279+
280+ $sectionEntries = New-Object System.Collections.Generic.List[object ]
267281 foreach ($entry in $linkMetadata.SectionToTitle.GetEnumerator ()) {
268- $key = [string ]$entry.Key
269- $val = ([string ]$entry.Value -replace ' \s+' , ' ' ).Trim()
270- if (-not [string ]::IsNullOrWhiteSpace($val )) {
271- $titleToSection [$val ] = $key
272- $withoutNum = ($val -replace ' ^\d+(?:\.\d+)*\s+' , ' ' ).Trim()
273- if ($withoutNum -and -not $titleToSection.ContainsKey ($withoutNum )) {
274- $titleToSection [$withoutNum ] = $key
282+ $sectionId = [string ]$entry.Key
283+ $title = ([string ]$entry.Value -replace ' \s+' , ' ' ).Trim()
284+ if ([string ]::IsNullOrWhiteSpace($sectionId ) -or [string ]::IsNullOrWhiteSpace($title )) {
285+ continue
286+ }
287+
288+ $titleWithoutNum = ($title -replace ' ^\d+(?:\.\d+)*\s+' , ' ' ).Trim()
289+ [void ]$sectionEntries.Add ([pscustomobject ]@ {
290+ SectionId = $sectionId
291+ TitleNormalized = $title
292+ TitleWithoutNumber = $titleWithoutNum
293+ SortKey = (& $getSectionSortKey $sectionId )
294+ })
295+ }
296+
297+ $orderedSectionEntries = @ ($sectionEntries | Sort-Object - Property @ (
298+ @ { Expression = { $_.SortKey } },
299+ @ { Expression = { $_.SectionId.ToLowerInvariant () } },
300+ @ { Expression = { $_.TitleNormalized.ToLowerInvariant () } }
301+ ))
302+
303+ $sectionIdSet = [System.Collections.Generic.HashSet [string ]]::new([System.StringComparer ]::OrdinalIgnoreCase)
304+ $titleToSections = @ {}
305+ $titleWithoutNumToSections = @ {}
306+
307+ foreach ($entry in $orderedSectionEntries ) {
308+ [void ]$sectionIdSet.Add ($entry.SectionId )
309+
310+ $titleKey = $entry.TitleNormalized.ToLowerInvariant ()
311+ if (-not $titleToSections.ContainsKey ($titleKey )) {
312+ $titleToSections [$titleKey ] = New-Object System.Collections.Generic.List[string ]
313+ }
314+ if (-not $titleToSections [$titleKey ].Contains($entry.SectionId )) {
315+ [void ]$titleToSections [$titleKey ].Add($entry.SectionId )
316+ }
317+
318+ if (-not [string ]::IsNullOrWhiteSpace($entry.TitleWithoutNumber )) {
319+ $withoutNumKey = $entry.TitleWithoutNumber.ToLowerInvariant ()
320+ if (-not $titleWithoutNumToSections.ContainsKey ($withoutNumKey )) {
321+ $titleWithoutNumToSections [$withoutNumKey ] = New-Object System.Collections.Generic.List[string ]
275322 }
323+ if (-not $titleWithoutNumToSections [$withoutNumKey ].Contains($entry.SectionId )) {
324+ [void ]$titleWithoutNumToSections [$withoutNumKey ].Add($entry.SectionId )
325+ }
326+ }
327+ }
328+
329+ $findUniqueSection = {
330+ param ([System.Collections.Generic.List [string ]]$candidateSections )
331+
332+ if ($null -eq $candidateSections ) {
333+ return $null
334+ }
335+
336+ $candidates = @ ($candidateSections | Sort-Object )
337+ if ($candidates.Count -eq 1 ) {
338+ return $candidates [0 ]
276339 }
340+
341+ return $null
277342 }
343+
278344 $sectionGuidRegex = [regex ]::new(' ^(?:[Ss]ection_)?([a-f0-9]{32})$' )
279345 $internalLinksArray = $linkMetadata.InternalHyperlinks.ToArray ()
280346 foreach ($link in $internalLinksArray ) {
@@ -285,18 +351,86 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
285351 $guid = $m.Groups [1 ].Value.ToLowerInvariant()
286352 if ($linkMetadata.GuidToSection.ContainsKey ($guid )) { continue }
287353 $matchedSection = $null
288- if ($titleToSection.ContainsKey ($text )) {
289- $matchedSection = $titleToSection [$text ]
354+
355+ if ($text -match ' ^(?:section\s+)?(?<num>\d+(?:\.\d+)*)$' ) {
356+ $directSection = " Section_$ ( $Matches [' num' ]) "
357+ if ($sectionIdSet.Contains ($directSection )) {
358+ $matchedSection = $directSection
359+ }
290360 }
291- else {
292- foreach ($tit in $titleToSection.Keys ) {
293- if ($tit -eq $text ) { $matchedSection = $titleToSection [$tit ]; break }
294- $textEsc = [Management.Automation.WildcardPattern ]::Escape($text )
295- $titEsc = [Management.Automation.WildcardPattern ]::Escape($tit )
296- if ($tit -like " *$textEsc *" -and $text.Length -ge 8 ) { $matchedSection = $titleToSection [$tit ]; break }
297- if ($text -like " *$titEsc *" -and $tit.Length -ge 8 ) { $matchedSection = $titleToSection [$tit ]; break }
361+
362+ $textKey = $text.ToLowerInvariant ()
363+ if (-not $matchedSection -and $titleToSections.ContainsKey ($textKey )) {
364+ $matchedSection = & $findUniqueSection $titleToSections [$textKey ]
365+ }
366+
367+ $textWithoutNum = ($text -replace ' ^\d+(?:\.\d+)*\s+' , ' ' ).Trim()
368+ if (-not $matchedSection -and -not [string ]::IsNullOrWhiteSpace($textWithoutNum )) {
369+ $textWithoutNumKey = $textWithoutNum.ToLowerInvariant ()
370+ if ($titleWithoutNumToSections.ContainsKey ($textWithoutNumKey )) {
371+ $matchedSection = & $findUniqueSection $titleWithoutNumToSections [$textWithoutNumKey ]
298372 }
299373 }
374+
375+ if (-not $matchedSection -and $text.Length -ge 8 ) {
376+ $fuzzyCandidates = New-Object System.Collections.Generic.List[object ]
377+ foreach ($entry in $orderedSectionEntries ) {
378+ $candidateTitle = $entry.TitleNormalized
379+ if ([string ]::IsNullOrWhiteSpace($candidateTitle ) -or $candidateTitle.Length -lt 8 ) {
380+ continue
381+ }
382+
383+ $containsText = $candidateTitle.IndexOf ($text , [System.StringComparison ]::OrdinalIgnoreCase) -ge 0
384+ $containsCandidate = $text.IndexOf ($candidateTitle , [System.StringComparison ]::OrdinalIgnoreCase) -ge 0
385+ if (-not $containsText -and -not $containsCandidate ) {
386+ continue
387+ }
388+
389+ $score = if ($containsText -and $containsCandidate ) {
390+ 0
391+ }
392+ elseif ($containsText ) {
393+ 1
394+ }
395+ else {
396+ 2
397+ }
398+
399+ [void ]$fuzzyCandidates.Add ([pscustomobject ]@ {
400+ Score = $score
401+ LengthDelta = [Math ]::Abs($candidateTitle.Length - $text.Length )
402+ SortKey = $entry.SortKey
403+ SectionId = $entry.SectionId
404+ })
405+ }
406+
407+ if ($fuzzyCandidates.Count -gt 0 ) {
408+ $orderedCandidates = @ ($fuzzyCandidates | Sort-Object - Property @ (
409+ @ { Expression = { $_.Score } },
410+ @ { Expression = { $_.LengthDelta } },
411+ @ { Expression = { $_.SortKey } },
412+ @ { Expression = { $_.SectionId } }
413+ ))
414+
415+ $best = $orderedCandidates [0 ]
416+ $isUniqueBest = $true
417+ if ($orderedCandidates.Count -gt 1 ) {
418+ $second = $orderedCandidates [1 ]
419+ if (
420+ $second.Score -eq $best.Score -and
421+ $second.LengthDelta -eq $best.LengthDelta -and
422+ $second.SortKey -eq $best.SortKey
423+ ) {
424+ $isUniqueBest = $false
425+ }
426+ }
427+
428+ if ($isUniqueBest ) {
429+ $matchedSection = $best.SectionId
430+ }
431+ }
432+ }
433+
300434 if ($matchedSection ) {
301435 $linkMetadata.GuidToSection [$guid ] = $matchedSection
302436 }
@@ -912,26 +1046,6 @@ function Get-OpenSpecOpenXmlParagraphAnchorInfo {
9121046 }
9131047}
9141048
915- function Get-OpenSpecOpenXmlParagraphAnchors {
916- [CmdletBinding ()]
917- param (
918- [Parameter (Mandatory )]
919- [System.Xml.XmlNode ]$ParagraphNode ,
920-
921- [Parameter (Mandatory )]
922- [System.Xml.XmlNamespaceManager ]$NamespaceManager ,
923-
924- [Parameter ()]
925- [string ]$ParagraphText ,
926-
927- [Parameter ()]
928- [string ]$HeadingStyle
929- )
930-
931- $info = Get-OpenSpecOpenXmlParagraphAnchorInfo - ParagraphNode $ParagraphNode - NamespaceManager $NamespaceManager - ParagraphText $ParagraphText - HeadingStyle $HeadingStyle
932- return @ ($info.Anchors )
933- }
934-
9351049function Get-OpenSpecOpenXmlParagraphInternalHyperlinks {
9361050 [CmdletBinding ()]
9371051 param (
0 commit comments