Skip to content

Commit 1aa0f63

Browse files
eamonnmcmanusgoogle-java-format Team
authored andcommitted
Initial support for fenced code blocks in Markdown Javadoc.
This adds more complexity to the already-complex logic in `JavadocWriter` than I would like. But it passes tests, and removes one of the main areas where Markdown comments would be mangled. PiperOrigin-RevId: 895567035
1 parent ffd5425 commit 1aa0f63

File tree

7 files changed

+232
-36
lines changed

7 files changed

+232
-36
lines changed

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
4141
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
4242
import com.google.googlejavaformat.java.javadoc.Token.Literal;
43+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
4344
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
4445
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
4546
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
@@ -134,6 +135,7 @@ private static String render(List<Token> input, int blockIndent, boolean classic
134135
case ParagraphCloseTag unused -> {}
135136
case ListItemCloseTag unused -> {}
136137
case OptionalLineBreak unused -> {}
138+
case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
137139
}
138140
}
139141
throw new AssertionError();

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,21 @@ private ImmutableList<Token> generateTokens() throws LexException {
127127
tokens.add(token);
128128

129129
while (!input.isExhausted()) {
130-
for (Token markdownToken : markdownPositions.tokensAt(input.position())) {
131-
boolean consumed = input.tryConsume(markdownToken.value());
132-
verify(consumed, "Did not consume markdown token: %s", markdownToken);
133-
var unused = input.readAndResetRecorded();
134-
tokens.add(markdownToken);
130+
boolean moreMarkdown;
131+
do {
132+
moreMarkdown = false;
133+
for (Token markdownToken : markdownPositions.tokensAt(input.position())) {
134+
tokens.add(markdownToken);
135+
if (!markdownToken.value().isEmpty()) {
136+
boolean consumed = input.tryConsume(markdownToken.value());
137+
verify(consumed, "Did not consume markdown token: %s", markdownToken);
138+
var unused = input.readAndResetRecorded();
139+
moreMarkdown = true;
140+
}
141+
}
142+
} while (moreMarkdown);
143+
if (input.isExhausted()) {
144+
break;
135145
}
136146
token = readToken();
137147
tokens.add(token);

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import static com.google.googlejavaformat.java.javadoc.JavadocWriter.RequestedWhitespace.NONE;
2424
import static com.google.googlejavaformat.java.javadoc.JavadocWriter.RequestedWhitespace.WHITESPACE;
2525

26+
import com.google.googlejavaformat.java.javadoc.Token.BrTag;
2627
import com.google.googlejavaformat.java.javadoc.Token.CodeCloseTag;
2728
import com.google.googlejavaformat.java.javadoc.Token.CodeOpenTag;
2829
import com.google.googlejavaformat.java.javadoc.Token.FooterJavadocTagStart;
@@ -33,6 +34,7 @@
3334
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
3435
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
3536
import com.google.googlejavaformat.java.javadoc.Token.Literal;
37+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
3638
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
3739
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
3840
import com.google.googlejavaformat.java.javadoc.Token.PreCloseTag;
@@ -72,6 +74,7 @@ final class JavadocWriter {
7274
private Token requestedMoeBeginStripComment;
7375
private int indentForMoeEndStripComment;
7476
private boolean wroteAnythingSignificant;
77+
private boolean justOutputBlankLine;
7578

7679
JavadocWriter(int blockIndent, boolean classicJavadoc) {
7780
this.blockIndent = blockIndent;
@@ -193,10 +196,13 @@ void writeListOpen(ListOpenTag token) {
193196
continuingListStack.push(indent);
194197
postWriteModifiedContinuingListStack.push();
195198

196-
requestNewline();
199+
if (!justOutputBlankLine) {
200+
requestNewline();
201+
}
197202
}
198203

199204
void writeListClose(ListCloseTag token) {
205+
System.err.printf("@@@ writeListClose\n");
200206
if (classicJavadoc) {
201207
requestNewline();
202208
}
@@ -212,7 +218,9 @@ void writeListClose(ListCloseTag token) {
212218
}
213219

214220
void writeListItemOpen(ListItemOpenTag token) {
215-
requestNewline();
221+
if (!justOutputBlankLine) {
222+
requestNewline();
223+
}
216224

217225
if (continuingListItemOfInnermostList) {
218226
continuingListItemOfInnermostList = false;
@@ -310,7 +318,7 @@ void writeHtmlComment(HtmlComment token) {
310318
requestNewline();
311319
}
312320

313-
void writeBr(Token token) {
321+
void writeBr(BrTag token) {
314322
writeToken(token);
315323

316324
requestNewline();
@@ -324,6 +332,42 @@ void writeLiteral(Literal token) {
324332
writeToken(token);
325333
}
326334

335+
private void flushWhitespace() {
336+
if (requestedMoeBeginStripComment != null) {
337+
requestNewline();
338+
}
339+
340+
if (classicJavadoc
341+
&& requestedWhitespace == BLANK_LINE
342+
&& (!postWriteModifiedContinuingListStack.isEmpty() || continuingFooterTag)) {
343+
requestedWhitespace = NEWLINE;
344+
}
345+
346+
if (requestedWhitespace == BLANK_LINE) {
347+
writeBlankLine();
348+
requestedWhitespace = NONE;
349+
} else if (requestedWhitespace == NEWLINE) {
350+
writeNewline();
351+
requestedWhitespace = NONE;
352+
}
353+
}
354+
355+
void writeMarkdownFencedCodeBlock(MarkdownFencedCodeBlock token) {
356+
flushWhitespace();
357+
output.append(token.start());
358+
token
359+
.literal()
360+
.lines()
361+
.forEach(
362+
line -> {
363+
writeNewline();
364+
output.append(line);
365+
});
366+
writeNewline();
367+
output.append(token.end());
368+
requestBlankLine();
369+
}
370+
327371
@Override
328372
public String toString() {
329373
return output.toString();
@@ -351,29 +395,7 @@ enum RequestedWhitespace {
351395
}
352396

353397
private void writeToken(Token token) {
354-
if (requestedMoeBeginStripComment != null) {
355-
requestNewline();
356-
}
357-
358-
if (requestedWhitespace == BLANK_LINE
359-
&& (!postWriteModifiedContinuingListStack.isEmpty() || continuingFooterTag)) {
360-
/*
361-
* We don't write blank lines inside lists or footer tags, even in cases where we otherwise
362-
* would (e.g., before a <p> tag). Justification: We don't write blank lines _between_ list
363-
* items or footer tags, so it would be strange to write blank lines _within_ one. Of course,
364-
* an alternative approach would be to go ahead and write blank lines between items/tags,
365-
* either always or only in the case that an item contains a blank line.
366-
*/
367-
requestedWhitespace = NEWLINE;
368-
}
369-
370-
if (requestedWhitespace == BLANK_LINE) {
371-
writeBlankLine();
372-
requestedWhitespace = NONE;
373-
} else if (requestedWhitespace == NEWLINE) {
374-
writeNewline();
375-
requestedWhitespace = NONE;
376-
}
398+
flushWhitespace();
377399
boolean needWhitespace = (requestedWhitespace == WHITESPACE);
378400

379401
/*
@@ -415,6 +437,9 @@ private void writeToken(Token token) {
415437
* http://denisbider.blogspot.com/2015/09/when-monospace-fonts-arent-unicode.html
416438
*/
417439
remainingOnLine -= token.length();
440+
if (!token.value().isEmpty()) {
441+
justOutputBlankLine = false;
442+
}
418443
requestedWhitespace = NONE;
419444
wroteAnythingSignificant = true;
420445
}
@@ -428,6 +453,7 @@ private void writeNewlineStart() {
428453
private void writeBlankLine() {
429454
writeNewlineStart();
430455
writeNewline();
456+
justOutputBlankLine = true;
431457
}
432458

433459
private void writeNewline() {

core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
import com.google.googlejavaformat.java.javadoc.Token.ListItemCloseTag;
2626
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
2727
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
28+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
2829
import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag;
2930
import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag;
3031
import java.util.regex.Matcher;
3132
import java.util.regex.Pattern;
3233
import org.commonmark.node.BulletList;
34+
import org.commonmark.node.FencedCodeBlock;
3335
import org.commonmark.node.Heading;
3436
import org.commonmark.node.ListItem;
3537
import org.commonmark.node.Node;
@@ -102,6 +104,25 @@ void visit(Node node) {
102104
visitNodeList(paragraph.getNext());
103105
}
104106
}
107+
case FencedCodeBlock fencedCodeBlock -> {
108+
// Any indentation before the code block is part of FencedCodeBlock. This makes sense
109+
// because the lines inside the code block must also be indented by that amount. That
110+
// indentation gets subtracted from FencedCodeBlock.getLiteral(), which is the actual text
111+
// represented by the code block.
112+
int start = startPosition(fencedCodeBlock) + fencedCodeBlock.getFenceIndent();
113+
MarkdownFencedCodeBlock token =
114+
new MarkdownFencedCodeBlock(
115+
input.substring(start, endPosition(fencedCodeBlock)),
116+
fencedCodeBlock
117+
.getFenceCharacter()
118+
.repeat(fencedCodeBlock.getOpeningFenceLength())
119+
+ fencedCodeBlock.getInfo(),
120+
fencedCodeBlock
121+
.getFenceCharacter()
122+
.repeat(fencedCodeBlock.getClosingFenceLength()),
123+
fencedCodeBlock.getLiteral());
124+
positionToToken.get(start).addLast(token);
125+
}
105126
// TODO: others
106127
default -> {}
107128
}
@@ -131,12 +152,17 @@ private void visitNodeList(Node node) {
131152
*/
132153
private void addSpan(Node node, Token startToken, Token endToken) {
133154
// We could write the first part more simply as a `put`, but we do it this way for symmetry.
134-
var first = node.getSourceSpans().getFirst();
135-
int startPosition = first.getInputIndex();
136-
positionToToken.get(startPosition).addLast(startToken);
155+
positionToToken.get(startPosition(node)).addLast(startToken);
156+
positionToToken.get(endPosition(node)).addFirst(endToken);
157+
}
158+
159+
private int startPosition(Node node) {
160+
return node.getSourceSpans().getFirst().getInputIndex();
161+
}
162+
163+
private int endPosition(Node node) {
137164
var last = node.getSourceSpans().getLast();
138-
int endPosition = last.getInputIndex() + last.getLength();
139-
positionToToken.get(endPosition).addFirst(endToken);
165+
return last.getInputIndex() + last.getLength();
140166
}
141167
}
142168

core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,27 @@ record HtmlComment(String value) implements Token {}
103103

104104
record BrTag(String value) implements Token {}
105105

106+
/**
107+
* A fenced code block, like
108+
*
109+
* <pre>
110+
* ```java
111+
* code block
112+
* with an info string ("java")
113+
* ```
114+
* </pre>
115+
*
116+
* @param value the full text of the code block as it appeared in the input, including the start
117+
* and end fences and the literal content.
118+
* @param start the start fence, including the info string if any ({@code ```java} in the
119+
* example).
120+
* @param end the end fence.
121+
* @param literal the text that the code block represents. This does not include the start and end
122+
* fences, nor any indentation that precedes these fences and every intervening line.
123+
*/
124+
record MarkdownFencedCodeBlock(String value, String start, String end, String literal)
125+
implements Token {}
126+
106127
/**
107128
* Whitespace that is not in a {@code <pre>} or {@code <table>} section. Whitespace includes
108129
* leading newlines, asterisks, and tabs and spaces. In the output, it is translated to newlines

core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,60 @@ class Test {}
17171717
///
17181718
/// A following paragraph.
17191719
class Test {}
1720+
""";
1721+
doFormatTest(input, expected);
1722+
}
1723+
1724+
@Test
1725+
public void markdownFencedCodeBlocks() {
1726+
assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
1727+
// If fenced code blocks are not supported correctly, the contents of each one will be joined.
1728+
// If the input lines survive as separate lines, that means we identified the code block.
1729+
String input =
1730+
"""
1731+
/// ```
1732+
/// foo
1733+
/// bar
1734+
/// ```
1735+
///
1736+
/// - ```
1737+
/// code block
1738+
/// in a list
1739+
/// ```
1740+
///
1741+
/// ~~~java
1742+
/// code block
1743+
/// with tildes and an info string ("java")
1744+
/// ~~~
1745+
///
1746+
/// ````
1747+
/// code block
1748+
/// with more than three backticks and an extra leading space
1749+
/// ````
1750+
class Test {}
1751+
""";
1752+
String expected =
1753+
"""
1754+
/// ```
1755+
/// foo
1756+
/// bar
1757+
/// ```
1758+
///
1759+
/// - ```
1760+
/// code block
1761+
/// in a list
1762+
/// ```
1763+
///
1764+
/// ~~~java
1765+
/// code block
1766+
/// with tildes and an info string ("java")
1767+
/// ~~~
1768+
///
1769+
/// ````
1770+
/// code block
1771+
/// with more than three backticks and an extra leading space
1772+
/// ````
1773+
class Test {}
17201774
""";
17211775
doFormatTest(input, expected);
17221776
}

0 commit comments

Comments
 (0)