Skip to content

Commit 93aab4f

Browse files
authored
fix(tostring): respect CommonMark flanking for word-attached emphasis (#506)
`tostring()` computes the on-screen width of inline markdown, used to size table columns. Its `*`/`**` rules were gated by an `at_valid` guard that only accepts emphasis at the start of the string or after whitespace, so word- attached markers such as `I**'ll finish**` or `x**bold**` were treated as literal text. The treesitter renderer, however, conceals those markers per CommonMark's left/right-flanking delimiter rules, so the computed width was larger than what is drawn and table borders drifted to the right. Replace the `at_valid` guard for `*` emphasis with proper left/right-flanking predicates that mirror treesitter's decision. Underscore (`_`) keeps the `at_valid` guard, since CommonMark forbids intra-word `_` emphasis (e.g. `snake_case`) which that guard already approximates. Add test/tostring_word_attached_emphasis.md, a visual fixture (matching test/tostring_recursion.md) whose table borders only line up with this fix.
1 parent 3d8f688 commit 93aab4f

2 files changed

Lines changed: 84 additions & 5 deletions

File tree

lua/markview/renderers/markdown/tostring.lua

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -661,21 +661,59 @@ local at_start = lpeg.P(function (_, i) return i == 1; end);
661661
local after_sp = lpeg.B(lpeg.S(" \t"));
662662
local at_valid = (at_start + after_sp);
663663

664+
-- CommonMark flanking rules for `*` emphasis.
665+
--
666+
-- The `at_valid` guard (start-of-string or after whitespace) used by the
667+
-- underscore variants treats word-attached `*`/`**` as literal text, e.g.
668+
-- `I**'ll finish**`. The treesitter renderer, however, follows CommonMark's
669+
-- left/right-flanking delimiter rules and conceals such markers, so the width
670+
-- computed here disagreed with what is drawn and table borders drifted right.
671+
-- These predicates mirror treesitter's flanking decision for `*`.
672+
--
673+
-- Underscore (`_`) keeps the `at_valid` guard: CommonMark forbids intra-word
674+
-- `_` emphasis (e.g. `snake_case`), which `at_valid` already approximates.
675+
local punct = lpeg.S("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
676+
local space = lpeg.S(" \t");
677+
local sol = at_start;
678+
local eol = -lpeg.P(1);
679+
680+
-- A run can OPEN emphasis when it is left-flanking: not followed by whitespace,
681+
-- and either not followed by punctuation, or preceded by whitespace/punct/start.
682+
-- `run` is the literal delimiter pattern (e.g. `lpeg.P("**")`); preceded-by is a
683+
-- zero-width look-behind placed before the run, followed-by a look-ahead after.
684+
local function flank_open (run)
685+
local after_not_space = #(lpeg.P(1) - space);
686+
local after_not_punct = #(lpeg.P(1) - space - punct);
687+
local before_ws_punct = sol + lpeg.B(space) + lpeg.B(punct);
688+
689+
return (run * after_not_punct) + (before_ws_punct * run * after_not_space);
690+
end
691+
692+
-- A run can CLOSE emphasis when it is right-flanking: not preceded by whitespace,
693+
-- and either not preceded by punctuation, or followed by whitespace/punct/end.
694+
local function flank_close (run)
695+
local before_not_space = lpeg.B(lpeg.P(1) - space);
696+
local before_not_punct = lpeg.B(lpeg.P(1) - space - punct);
697+
local after_ws_punct = #(space + punct) + eol;
698+
699+
return (before_not_punct * run) + (before_not_space * run * after_ws_punct);
700+
end
701+
664702
local s_italic_content = lpeg.P("\\*") + ( 1 - lpeg.P("*") );
665703
local u_italic_content = lpeg.P("\\_") + ( 1 - lpeg.P("_") );
666-
local s_italic = lpeg.C( lpeg.P("*") * s_italic_content^1 * lpeg.P("*") ) / md_str.italic;
704+
local s_italic = lpeg.C( flank_open(lpeg.P("*")) * s_italic_content^1 * flank_close(lpeg.P("*")) ) / md_str.italic;
667705
local u_italic = lpeg.C( lpeg.P("_") * u_italic_content^1 * lpeg.P("_") ) / md_str.italic;
668-
local italic = at_valid * (s_italic + u_italic);
706+
local italic = s_italic + (at_valid * u_italic);
669707

670708
local s_bold_content = lpeg.P("\\*") + ( 1 - lpeg.P("*") );
671709
local u_bold_content = lpeg.P("\\_") + ( 1 - lpeg.P("_") );
672-
local s_bold = lpeg.C( lpeg.P("**") * s_bold_content^1 * lpeg.P("**") ) / md_str.bold;
710+
local s_bold = lpeg.C( flank_open(lpeg.P("**")) * s_bold_content^1 * flank_close(lpeg.P("**")) ) / md_str.bold;
673711
local u_bold = lpeg.C( lpeg.P("__") * u_bold_content^1 * lpeg.P("__") ) / md_str.bold;
674-
local bold = at_valid * (s_bold + u_bold);
712+
local bold = s_bold + (at_valid * u_bold);
675713

676714
local s_bold_italic_content = lpeg.P("\\*") + ( 1 - lpeg.P("*") );
677715
local u_bold_italic_content = lpeg.P("\\_") + ( 1 - lpeg.P("_") );
678-
local s_bold_italic = lpeg.C( lpeg.P("*")^3 * s_bold_italic_content^1 * lpeg.P("*")^3 ) / md_str.bold_italic;
716+
local s_bold_italic = lpeg.C( flank_open(lpeg.P("*")^3) * s_bold_italic_content^1 * flank_close(lpeg.P("*")^3) ) / md_str.bold_italic;
679717
local u_bold_italic = lpeg.C( lpeg.P("_")^3 * u_bold_italic_content^1 * lpeg.P("_")^3 ) / md_str.bold_italic;
680718
local bold_italic = s_bold_italic + u_bold_italic;
681719

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; Word-attached emphasis in tables — tostring column width
2+
3+
Emphasis markers glued to a word (`x**bold**`, `I**'ll**`) are concealed by the
4+
renderer per CommonMark flanking rules, so the column-width calculation in
5+
`renderers/markdown/tostring.lua` must account for them too. If it doesn't, the
6+
calculated width is larger than what is drawn and the right border drifts.
7+
8+
Open this file and check that every right border lines up.
9+
10+
### Bold glued to a word
11+
12+
| Case | Example |
13+
|-------------------------------|----------------------|
14+
| Before a letter | x**bold** end |
15+
| Before punctuation | I**'ll finish** soon |
16+
| Before punctuation | I**'d be** here |
17+
| After a word + space (normal) | a **bold** b |
18+
| Plain (control) | just normal text |
19+
20+
### Italic glued to a word
21+
22+
| Case | Example |
23+
|----------------------------|-----------|
24+
| Italic before punctuation | I*'ll* go |
25+
| Intra-word italic | 2*3*4 ok |
26+
| Plain italic (control) | a *it* b |
27+
28+
### Underscore stays literal (no intra-word emphasis)
29+
30+
| Case | Example |
31+
|------------------|----------------------|
32+
| snake_case | a snake_case_id here |
33+
| Plain (control) | just normal text |
34+
35+
### Conditionals cheat sheet (real-world)
36+
37+
| Type | Example |
38+
|-------|-------------------------------------------------------------|
39+
| 1st | If you **help** me, I**'ll finish** sooner. |
40+
| Mixed | If I **had studied** medicine, I**'d be** a doctor **now**. |
41+
| Mixed | If I **weren't** so shy, I**'d have spoken** up yesterday. |

0 commit comments

Comments
 (0)