Skip to content

Commit ce8a2d6

Browse files
github-actions[bot]Copilotdsyme
authored
[Repo Assist] Fix incorrect column ranges for inline spans in Markdown parser (#1016)
* Fix incorrect column ranges for inline spans in Markdown parser (#744) The column positions tracked by parseChars were not advanced after non-literal inline spans (links, images, inline code), causing all subsequent spans on the same line to report incorrect StartColumn values. Root cause: after emitting a DirectLink, IndirectLink, InlineCode or AutoLink span, the ctx passed to the recursive parseChars call still had StartColumn pointing at the start of the just-emitted span rather than past it. Fix: introduce advanceCtxBy/spanRange helpers and use them to: - Advance ctx.StartColumn by the consumed character count after each span - Compute correct EndColumn for each span (StartColumn + consumed length) - Pass correct body context (StartColumn+1) when parsing link text Covers: DirectLink, IndirectLink, AutoLink, inline <url> links, InlineCode (both backtick forms), DirectImage, IndirectImage. Adds three regression tests from issue #744. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * ci: trigger CI checks --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Don Syme <dsyme@users.noreply.github.com>
1 parent af8587a commit ce8a2d6

3 files changed

Lines changed: 178 additions & 19 deletions

File tree

RELEASE_NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* HTML-encode XML doc text nodes and unresolved `<see cref>` values to prevent HTML injection and fix broken output when docs contain characters like `<`, `>`, or backticks in generic type notation. [#748](https://github.com/fsprojects/FSharp.Formatting/issues/748)
1212
* Add uppercase output kind extension (e.g. `HTML`, `IPYNB`) to `ConditionalDefines` so that `#if HTML` and `(*** condition: HTML ***)` work alongside their lowercase variants. [#693](https://github.com/fsprojects/FSharp.Formatting/issues/693)
1313
* Strip `#if SYMBOL` / `#endif // SYMBOL` marker lines from `LiterateCode` source before syntax-highlighting so they do not appear in formatted output. [#693](https://github.com/fsprojects/FSharp.Formatting/issues/693)
14+
* Fix incorrect column ranges for inline spans (links, images, inline code) in the Markdown parser — spans and subsequent literals now report correct `StartColumn`/`EndColumn` values. [#744](https://github.com/fsprojects/FSharp.Formatting/issues/744)
1415
* Normalize `--projects` paths to absolute paths before passing to the project cracker, fixing failures when relative paths are supplied. [#793](https://github.com/fsprojects/FSharp.Formatting/issues/793)
1516

1617
### Changed

src/FSharp.Formatting.Markdown/MarkdownParser.fs

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,23 @@ type ParsingContext =
324324

325325
member x.AllowYamlFrontMatter = (x.ParseOptions &&& MarkdownParseOptions.AllowYamlFrontMatter) <> enum 0
326326

327+
/// Advances the StartColumn of the current range in ctx by n characters.
328+
let private advanceCtxBy n ctx =
329+
{ ctx with
330+
CurrentRange =
331+
match ctx.CurrentRange with
332+
| Some r ->
333+
Some
334+
{ r with
335+
StartColumn = r.StartColumn + n }
336+
| None -> None }
337+
338+
/// Computes a span range starting at ctx.StartColumn and spanning n characters.
339+
let private spanRange n ctx =
340+
match ctx.CurrentRange with
341+
| Some r -> Some { r with EndColumn = r.StartColumn + n }
342+
| None -> None
343+
327344
/// Parses a body of a paragraph and recognizes all inline tags.
328345
let rec parseChars acc input (ctx: ParsingContext) =
329346
seq {
@@ -390,11 +407,11 @@ let rec parseChars acc input (ctx: ParsingContext) =
390407
Some
391408
{ n with
392409
StartColumn = n.StartColumn + s
393-
EndColumn = n.EndColumn - e }
410+
EndColumn = n.StartColumn + s + body.Length }
394411
| None -> None
395412

396413
yield InlineCode(String(Array.ofList body).Trim(), rng)
397-
yield! parseChars [] rest ctx
414+
yield! parseChars [] rest (advanceCtxBy (s + body.Length + e) ctx)
398415

399416
// Display Latex inline math mode
400417
| DelimitedLatexDisplayMath [ '$'; '$' ] (body, rest) ->
@@ -442,60 +459,70 @@ let rec parseChars acc input (ctx: ParsingContext) =
442459
->
443460
let (value, ctx) = accLiterals.Value
444461
yield! value
445-
yield DirectLink([ Literal(link, ctx.CurrentRange) ], link, None, ctx.CurrentRange)
446-
yield! parseChars [] rest ctx
462+
let consumed = 1 + link.Length + 1
463+
yield DirectLink([ Literal(link, spanRange consumed ctx) ], link, None, spanRange consumed ctx)
464+
yield! parseChars [] rest (advanceCtxBy consumed ctx)
447465
// Not an inline link - leave as an inline HTML tag
448466
| List.DelimitedWith [ '<' ] [ '>' ] (tag, rest, _s, _e) ->
449467
yield! parseChars ('>' :: (List.rev tag) @ '<' :: acc) rest ctx
450468

451469
// Recognize direct link [foo](http://bar) or indirect link [foo][bar] or auto link http://bar
452-
| DirectLink(body, link, rest) ->
470+
| DirectLink(body, linkChars, rest) ->
453471
let (value, ctx) = accLiterals.Value
454472
yield! value
455473

456-
let link, title = getLinkAndTitle (String(Array.ofList link), MarkdownRange.zero)
474+
let consumed = 2 + body.Length + 2 + linkChars.Length
475+
let link, title = getLinkAndTitle (String(Array.ofList linkChars), MarkdownRange.zero)
476+
let bodyCtx = advanceCtxBy 1 ctx // advance past opening '['
457477

458-
yield DirectLink(parseChars [] body ctx |> List.ofSeq, link, title, ctx.CurrentRange)
459-
yield! parseChars [] rest ctx
478+
yield DirectLink(parseChars [] body bodyCtx |> List.ofSeq, link, title, spanRange consumed ctx)
479+
yield! parseChars [] rest (advanceCtxBy consumed ctx)
460480
| IndirectLink(body, link, original, rest) ->
461481
let (value, ctx) = accLiterals.Value
462482
yield! value
463483

484+
let consumed = 2 + body.Length + original.Length
485+
let bodyCtx = advanceCtxBy 1 ctx // advance past opening '['
486+
464487
let key =
465488
if String.IsNullOrEmpty(link) then
466489
String(body |> Array.ofSeq)
467490
else
468491
link
469492

470-
yield IndirectLink(parseChars [] body ctx |> List.ofSeq, original, key, ctx.CurrentRange)
471-
yield! parseChars [] rest ctx
493+
yield IndirectLink(parseChars [] body bodyCtx |> List.ofSeq, original, key, spanRange consumed ctx)
494+
yield! parseChars [] rest (advanceCtxBy consumed ctx)
472495
| AutoLink(link, rest) ->
473496
let (value, ctx) = accLiterals.Value
474497
yield! value
475-
yield DirectLink([ Literal(link, ctx.CurrentRange) ], link, None, ctx.CurrentRange)
476-
yield! parseChars [] rest ctx
498+
let consumed = link.Length
499+
yield DirectLink([ Literal(link, spanRange consumed ctx) ], link, None, spanRange consumed ctx)
500+
yield! parseChars [] rest (advanceCtxBy consumed ctx)
477501

478502
// Recognize image - this is a link prefixed with the '!' symbol
479-
| '!' :: DirectLink(body, link, rest) ->
503+
| '!' :: DirectLink(body, linkChars, rest) ->
480504
let (value, ctx) = accLiterals.Value
481505
yield! value
482506

483-
let link, title = getLinkAndTitle (String(Array.ofList link), MarkdownRange.zero)
507+
let consumed = 1 + 2 + body.Length + 2 + linkChars.Length
508+
let link, title = getLinkAndTitle (String(Array.ofList linkChars), MarkdownRange.zero)
484509

485-
yield DirectImage(String(Array.ofList body), link, title, ctx.CurrentRange)
486-
yield! parseChars [] rest ctx
510+
yield DirectImage(String(Array.ofList body), link, title, spanRange consumed ctx)
511+
yield! parseChars [] rest (advanceCtxBy consumed ctx)
487512
| '!' :: IndirectLink(body, link, original, rest) ->
488513
let (value, ctx) = accLiterals.Value
489514
yield! value
490515

516+
let consumed = 1 + 2 + body.Length + original.Length
517+
491518
let key =
492519
if String.IsNullOrEmpty(link) then
493520
String(body |> Array.ofSeq)
494521
else
495522
link
496523

497-
yield IndirectImage(String(Array.ofList body), original, key, ctx.CurrentRange)
498-
yield! parseChars [] rest ctx
524+
yield IndirectImage(String(Array.ofList body), original, key, spanRange consumed ctx)
525+
yield! parseChars [] rest (advanceCtxBy consumed ctx)
499526

500527
// Handle Emphasis
501528
| CannotOpenEmphasis(revPre, post) -> yield! parseChars (revPre @ acc) post ctx

tests/FSharp.Markdown.Tests/Markdown.fs

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1129,8 +1129,139 @@ let ``Replace relative markdown file in custom attribute`` () =
11291129

11301130
Markdown.ToHtml(doc, mdlinkResolver = mdlinkResolver) |> shouldEqual actual
11311131

1132+
// --------------------------------------------------------------------------------------
1133+
// Span range correctness tests (Issue #744)
1134+
// These tests verify that column positions are tracked correctly across span types.
1135+
// --------------------------------------------------------------------------------------
1136+
11321137
[<Test>]
1133-
let ``Don't replace links in generated code block`` () =
1138+
let ``Indirect link and subsequent literal have correct column ranges`` () =
1139+
// 1 2
1140+
// 0123456789012345678901234567
1141+
let doc = "Before [indirectLink] After" |> Markdown.Parse
1142+
1143+
doc.Paragraphs
1144+
|> shouldEqual
1145+
[ Paragraph(
1146+
[ Literal(
1147+
"Before ",
1148+
Some(
1149+
{ StartLine = 1
1150+
StartColumn = 0
1151+
EndLine = 1
1152+
EndColumn = 7 }
1153+
)
1154+
)
1155+
IndirectLink(
1156+
[ Literal(
1157+
"indirectLink",
1158+
Some(
1159+
{ StartLine = 1
1160+
StartColumn = 8
1161+
EndLine = 1
1162+
EndColumn = 20 }
1163+
)
1164+
) ],
1165+
"",
1166+
"indirectLink",
1167+
Some(
1168+
{ StartLine = 1
1169+
StartColumn = 7
1170+
EndLine = 1
1171+
EndColumn = 21 }
1172+
)
1173+
)
1174+
Literal(
1175+
" After",
1176+
Some(
1177+
{ StartLine = 1
1178+
StartColumn = 21
1179+
EndLine = 1
1180+
EndColumn = 27 }
1181+
)
1182+
) ],
1183+
Some(
1184+
{ StartLine = 1
1185+
StartColumn = 0
1186+
EndLine = 1
1187+
EndColumn = 27 }
1188+
)
1189+
) ]
1190+
1191+
[<Test>]
1192+
let ``Direct link and subsequent literal have correct column ranges`` () =
1193+
// 1 2 3
1194+
// 0123456789012345678901234567890123
1195+
let doc = "Before [link](http://x.com) After" |> Markdown.Parse
1196+
1197+
match doc.Paragraphs with
1198+
| [ Paragraph([ Literal("Before ", litRange1)
1199+
DirectLink(_, "http://x.com", _, linkRange)
1200+
Literal(" After", litRange2) ],
1201+
_) ] ->
1202+
litRange1
1203+
|> shouldEqual (
1204+
Some
1205+
{ StartLine = 1
1206+
StartColumn = 0
1207+
EndLine = 1
1208+
EndColumn = 7 }
1209+
)
1210+
1211+
linkRange
1212+
|> shouldEqual (
1213+
Some
1214+
{ StartLine = 1
1215+
StartColumn = 7
1216+
EndLine = 1
1217+
EndColumn = 27 }
1218+
)
1219+
1220+
litRange2
1221+
|> shouldEqual (
1222+
Some
1223+
{ StartLine = 1
1224+
StartColumn = 27
1225+
EndLine = 1
1226+
EndColumn = 33 }
1227+
)
1228+
| _ -> Assert.Fail "Expected paragraph with literal + direct link + literal"
1229+
1230+
[<Test>]
1231+
let ``Inline code and subsequent literal have correct column ranges`` () =
1232+
// 0123456789012
1233+
let doc = "foo `bar` baz" |> Markdown.Parse
1234+
1235+
match doc.Paragraphs with
1236+
| [ Paragraph([ Literal("foo ", litRange1); InlineCode("bar", codeRange); Literal(" baz", litRange2) ], _) ] ->
1237+
litRange1
1238+
|> shouldEqual (
1239+
Some
1240+
{ StartLine = 1
1241+
StartColumn = 0
1242+
EndLine = 1
1243+
EndColumn = 4 }
1244+
)
1245+
1246+
codeRange
1247+
|> shouldEqual (
1248+
Some
1249+
{ StartLine = 1
1250+
StartColumn = 5
1251+
EndLine = 1
1252+
EndColumn = 8 }
1253+
)
1254+
1255+
litRange2
1256+
|> shouldEqual (
1257+
Some
1258+
{ StartLine = 1
1259+
StartColumn = 9
1260+
EndLine = 1
1261+
EndColumn = 13 }
1262+
)
1263+
| _ -> Assert.Fail "Expected paragraph with literal + inline code + literal"
1264+
11341265
let doc = "<pre link=\"valid link though.md\">content</pre>"
11351266
let mdlinkResolver _ = failwith "should not be reached!"
11361267
let actual = "<pre link=\"valid link though.md\">content</pre>\r\n" |> properNewLines

0 commit comments

Comments
 (0)