google
diff --git a/‎core/pom.xml‎
Lines changed: 5 additions & 0 deletions b/‎core/pom.xml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎core/src/main/java/com/google/googlejavaformat/java/JavaCommentsHelper.java‎
Lines changed: 7 additions & 1 deletion b/‎core/src/main/java/com/google/googlejavaformat/java/JavaCommentsHelper.java‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎core/src/main/java/com/google/googlejavaformat/java/JavaInput.java‎
Lines changed: 7 additions & 3 deletions b/‎core/src/main/java/com/google/googlejavaformat/java/JavaInput.java‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎core/src/main/java/com/google/googlejavaformat/java/javadoc/CharStream.java‎
Lines changed: 11 additions & 7 deletions b/‎core/src/main/java/com/google/googlejavaformat/java/javadoc/CharStream.java‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java‎
Lines changed: 50 additions & 8 deletions b/‎core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java‎
Lines changed: 50 additions & 8 deletions
diff --git a/‎core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java‎
Lines changed: 59 additions & 29 deletions b/‎core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java‎
Lines changed: 59 additions & 29 deletions
@@ -39,6 +39,11 @@
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.commonmark</groupId>
+      <artifactId>commonmark</artifactId>
+      <version>0.28.0</version>
+    </dependency>
 
     <!-- Compile-time dependencies -->
     <dependency>
 
@@ -50,7 +50,13 @@ public String rewrite(Tok tok, int maxWidth, int column0) {
     }
     String text = tok.getOriginalText();
     if (tok.isJavadocComment() && options.formatJavadoc()) {
-      text = JavadocFormatter.formatJavadoc(text, column0);
+      if (text.startsWith("///")) {
+        if (markdownJavadocPositions.contains(tok.getPosition())) {
+          return JavadocFormatter.formatJavadoc(text, column0);
+        }
+      } else {
+        text = JavadocFormatter.formatJavadoc(text, column0);
+      }
     }
     List<String> lines = new ArrayList<>();
     Iterator<String> it = Newlines.lineIterator(text);
 
@@ -160,9 +160,13 @@ public boolean isSlashStarComment() {
 
     @Override
     public boolean isJavadocComment() {
-      // comments like `/***` are also javadoc, but their formatting probably won't be improved
-      // by the javadoc formatter
-      return text.startsWith("/**") && text.charAt("/**".length()) != '*' && text.length() > 4;
+      // comments like `/***` or `////` are also javadoc, but their formatting probably won't be
+      // improved by the javadoc formatter
+      return ((text.startsWith("/**") && !text.startsWith("/***"))
+              || (Runtime.version().feature() >= 23
+                  && text.startsWith("///")
+                  && !text.startsWith("////")))
+          && text.length() > 4;
     }
 
     @Override
 
@@ -27,18 +27,18 @@
  */
 final class CharStream {
   private final String input;
-  private int start;
+  private int position;
   private int tokenEnd = -1; // Negative value means no token, and will cause an exception if used.
 
   CharStream(String input) {
     this.input = checkNotNull(input);
   }
 
   boolean tryConsume(String expected) {
-    if (!input.startsWith(expected, start)) {
+    if (!input.startsWith(expected, position)) {
       return false;
     }
-    tokenEnd = start + expected.length();
+    tokenEnd = position + expected.length();
     return true;
   }
 
@@ -48,7 +48,7 @@ boolean tryConsume(String expected) {
    * @param pattern the pattern to search for, which must be anchored to match only at position 0
    */
   boolean tryConsumeRegex(Pattern pattern) {
-    Matcher matcher = pattern.matcher(input).region(start, input.length());
+    Matcher matcher = pattern.matcher(input).region(position, input.length());
     if (!matcher.lookingAt()) {
       return false;
     }
@@ -57,13 +57,17 @@ boolean tryConsumeRegex(Pattern pattern) {
   }
 
   String readAndResetRecorded() {
-    String result = input.substring(start, tokenEnd);
-    start = tokenEnd;
+    String result = input.substring(position, tokenEnd);
+    position = tokenEnd;
     tokenEnd = -1;
     return result;
   }
 
   boolean isExhausted() {
-    return start == input.length();
+    return position == input.length();
+  }
+
+  int position() {
+    return position;
   }
 }
@@ -14,12 +14,15 @@
 
 package com.google.googlejavaformat.java.javadoc;
 
+import static com.google.common.base.Preconditions.checkState;
 import static com.google.googlejavaformat.java.javadoc.JavadocLexer.lex;
 import static com.google.googlejavaformat.java.javadoc.Token.Type.BR_TAG;
 import static com.google.googlejavaformat.java.javadoc.Token.Type.PARAGRAPH_OPEN_TAG;
 import static java.util.regex.Pattern.CASE_INSENSITIVE;
 import static java.util.regex.Pattern.compile;
+import static java.util.stream.Collectors.joining;
 
+import com.google.common.base.CharMatcher;
 import com.google.common.collect.ImmutableList;
 import com.google.googlejavaformat.java.javadoc.JavadocLexer.LexException;
 import java.util.List;
@@ -39,22 +42,37 @@ public final class JavadocFormatter {
   static final int MAX_LINE_LENGTH = 100;
 
   /**
-   * Formats the given Javadoc comment, which must start with ∕✱✱ and end with ✱∕. The output will
-   * start and end with the same characters.
+   * Formats the given Javadoc comment. A classic Javadoc comment must start with ∕✱✱ and end with
+   * ✱∕, and the output will start and end with the same characters. A Markdown Javadoc comment
+   * consists of lines each of which starts with ///, and the output will also consist of such
+   * lines.
    */
   public static String formatJavadoc(String input, int blockIndent) {
+    boolean classicJavadoc =
+        switch (input) {
+          case String s when s.startsWith("/**") -> true;
+          case String s when s.startsWith("///") -> false;
+          default ->
+              throw new IllegalArgumentException("Input does not start with /** or ///: " + input);
+        };
+    if (!classicJavadoc) {
+      input = "///" + markdownCommentText(input);
+    }
     ImmutableList<Token> tokens;
     try {
-      tokens = lex(input);
+      tokens = lex(input, classicJavadoc);
     } catch (LexException e) {
       return input;
     }
-    String result = render(tokens, blockIndent);
-    return makeSingleLineIfPossible(blockIndent, result);
+    String result = render(tokens, blockIndent, classicJavadoc);
+    if (classicJavadoc) {
+      result = makeSingleLineIfPossible(blockIndent, result);
+    }
+    return result;
   }
 
-  private static String render(List<Token> input, int blockIndent) {
-    JavadocWriter output = new JavadocWriter(blockIndent);
+  private static String render(List<Token> input, int blockIndent, boolean classicJavadoc) {
+    JavadocWriter output = new JavadocWriter(blockIndent, classicJavadoc);
     for (Token token : input) {
       switch (token.type()) {
         case BEGIN_JAVADOC -> output.writeBeginJavadoc();
@@ -137,12 +155,36 @@ private static boolean oneLineJavadoc(String line, int blockIndent) {
       return false;
     }
     // If the javadoc contains only a tag, use multiple lines to encourage writing a summary
-    // fragment, unless it's /* @hide */.
+    // fragment, unless it's /** @hide */.
     if (line.startsWith("@") && !line.equals("@hide")) {
       return false;
     }
     return true;
   }
 
+  private static final CharMatcher NOT_SPACE_OR_TAB = CharMatcher.noneOf(" \t");
+
+  /**
+   * Returns the given string with the leading /// and any common leading whitespace removed from
+   * each line. The resultant string can then be fed to a standard Markdown parser.
+   */
+  private static String markdownCommentText(String input) {
+    List<String> lines =
+        input
+            .lines()
+            .peek(line -> checkState(line.contains("///"), "Line does not contain ///: %s", line))
+            .map(line -> line.substring(line.indexOf("///") + 3))
+            .toList();
+    int leadingSpace =
+        lines.stream()
+            .filter(line -> NOT_SPACE_OR_TAB.matchesAnyOf(line))
+            .mapToInt(NOT_SPACE_OR_TAB::indexIn)
+            .min()
+            .orElse(0);
+    return lines.stream()
+        .map(line -> line.length() < leadingSpace ? "" : line.substring(leadingSpace))
+        .collect(joining("\n"));
+  }
+
   private JavadocFormatter() {}
 }
@@ -55,7 +55,6 @@
 import com.google.common.base.CharMatcher;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.PeekingIterator;
-import com.google.googlejavaformat.java.javadoc.Token.Type;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Deque;
@@ -65,15 +64,24 @@
 /** Lexer for the Javadoc formatter. */
 final class JavadocLexer {
   /** Takes a Javadoc comment, including ∕✱✱ and ✱∕, and returns tokens, including ∕✱✱ and ✱∕. */
-  static ImmutableList<Token> lex(String input) throws LexException {
-    /*
-     * TODO(cpovirk): In theory, we should interpret Unicode escapes (yet output them in their
-     * original form). This would mean mean everything from an encoded ∕✱✱ to an encoded <pre> tag,
-     * so we'll probably never bother.
-     */
-    input = stripJavadocBeginAndEnd(input);
+  static ImmutableList<Token> lex(String input, boolean classicJavadoc) throws LexException {
+    MarkdownPositions markdownPositions;
+    if (classicJavadoc) {
+      /*
+       * TODO(cpovirk): In theory, we should interpret Unicode escapes (yet output them in their
+       * original form). This would mean mean everything from an encoded ∕✱✱ to an encoded <pre>
+       * tag, so we'll probably never bother.
+       */
+      input = stripJavadocBeginAndEnd(input);
+      markdownPositions = MarkdownPositions.EMPTY;
+    } else {
+      checkArgument(input.startsWith("///"));
+      input = input.substring("///".length());
+      markdownPositions = MarkdownPositions.parse(input);
+    }
     input = normalizeLineEndings(input);
-    return new JavadocLexer(new CharStream(input)).generateTokens();
+    return new JavadocLexer(new CharStream(input), markdownPositions, classicJavadoc)
+        .generateTokens();
   }
 
   /** The lexer crashes on windows line endings, so for now just normalize to `\n`. */
@@ -95,56 +103,65 @@ private static String stripJavadocBeginAndEnd(String input) {
   }
 
   private final CharStream input;
+  private final boolean classicJavadoc;
+  private final MarkdownPositions markdownPositions;
   private final NestingStack braceStack = new NestingStack();
   private final NestingStack preStack = new NestingStack();
   private final NestingStack codeStack = new NestingStack();
   private final NestingStack tableStack = new NestingStack();
   private boolean outerInlineTagIsSnippet;
   private boolean somethingSinceNewline;
 
-  private JavadocLexer(CharStream input) {
+  private JavadocLexer(
+      CharStream input, MarkdownPositions markdownPositions, boolean classicJavadoc) {
     this.input = checkNotNull(input);
+    this.markdownPositions = markdownPositions;
+    this.classicJavadoc = classicJavadoc;
   }
 
   private ImmutableList<Token> generateTokens() throws LexException {
     ImmutableList.Builder<Token> tokens = ImmutableList.builder();
 
-    Token token = new Token(BEGIN_JAVADOC, "/**");
+    Token token = new Token(BEGIN_JAVADOC, classicJavadoc ? "/**" : "///");
     tokens.add(token);
 
     while (!input.isExhausted()) {
+      tokens.addAll(markdownPositions.tokensAt(input.position()));
       token = readToken();
       tokens.add(token);
     }
 
     checkMatchingTags();
 
-    token = new Token(END_JAVADOC, "*/");
+    token = new Token(END_JAVADOC, classicJavadoc ? "*/" : "");
     tokens.add(token);
 
     ImmutableList<Token> result = tokens.build();
     result = joinAdjacentLiteralsAndAdjacentWhitespace(result);
-    result = inferParagraphTags(result);
+    if (classicJavadoc) {
+      result = inferParagraphTags(result);
+    }
     result = optionalizeSpacesAfterLinks(result);
     result = deindentPreCodeBlocks(result);
     return result;
   }
 
   private Token readToken() throws LexException {
-    Type type = consumeToken();
+    Token.Type type = consumeToken();
     String value = input.readAndResetRecorded();
     return new Token(type, value);
   }
 
-  private Type consumeToken() throws LexException {
+  private Token.Type consumeToken() throws LexException {
     boolean preserveExistingFormatting = preserveExistingFormatting();
 
-    if (input.tryConsumeRegex(NEWLINE_PATTERN)) {
+    Pattern newlinePattern = classicJavadoc ? CLASSIC_NEWLINE_PATTERN : MARKDOWN_NEWLINE_PATTERN;
+    if (input.tryConsumeRegex(newlinePattern)) {
       somethingSinceNewline = false;
       return preserveExistingFormatting ? FORCED_NEWLINE : WHITESPACE;
     } else if (input.tryConsume(" ") || input.tryConsume("\t")) {
       // TODO(cpovirk): How about weird whitespace chars? Ideally we'd distinguish breaking vs. not.
-      // Returning LITERAL here prevent us from breaking a <pre> line. For more info, see LITERAL.
+      // Returning LITERAL here prevents us from breaking a <pre> line. For more info, see LITERAL.
       return preserveExistingFormatting ? LITERAL : WHITESPACE;
     }
 
@@ -187,7 +204,7 @@ private Type consumeToken() throws LexException {
 
     // Inside an inline tag, don't do any HTML interpretation.
     if (!braceStack.isEmpty()) {
-      verify(input.tryConsumeRegex(LITERAL_PATTERN));
+      verify(input.tryConsumeRegex(literalPattern()));
       return LITERAL;
     }
 
@@ -216,7 +233,7 @@ private Type consumeToken() throws LexException {
     }
 
     if (preserveExistingFormatting) {
-      verify(input.tryConsumeRegex(LITERAL_PATTERN));
+      verify(input.tryConsumeRegex(literalPattern()));
       return LITERAL;
     }
 
@@ -248,7 +265,7 @@ private Type consumeToken() throws LexException {
       return MOE_END_STRIP_COMMENT;
     } else if (input.tryConsumeRegex(HTML_COMMENT_PATTERN)) {
       return HTML_COMMENT;
-    } else if (input.tryConsumeRegex(LITERAL_PATTERN)) {
+    } else if (input.tryConsumeRegex(literalPattern())) {
       return LITERAL;
     }
     throw new AssertionError();
@@ -274,7 +291,7 @@ private void checkMatchingTags() throws LexException {
    * Join together adjacent literal tokens, and join together adjacent whitespace tokens.
    *
    * <p>For literal tokens, this means something like {@code ["<b>", "foo", "</b>"] =>
-   * ["<b>foo</b>"]}. See {@link #LITERAL_PATTERN} for discussion of why those tokens are separate
+   * ["<b>foo</b>"]}. See {@link #literalPattern()} for discussion of why those tokens are separate
    * to begin with.
    *
    * <p>Whitespace tokens are treated analogously. We don't really "want" to join whitespace tokens,
@@ -514,7 +531,8 @@ private static boolean hasMultipleNewlines(String s) {
    * We'd remove the trailing whitespace later on (in JavaCommentsHelper.rewrite), but I feel safer
    * stripping it now: It otherwise might confuse our line-length count, which we use for wrapping.
    */
-  private static final Pattern NEWLINE_PATTERN = compile("[ \t]*\n[ \t]*[*]?[ \t]?");
+  private static final Pattern CLASSIC_NEWLINE_PATTERN = compile("[ \t]*\n[ \t]*[*]?[ \t]?");
+  private static final Pattern MARKDOWN_NEWLINE_PATTERN = compile("[ \t]*\n[ \t]*");
 
   // We ensure elsewhere that we match this only at the beginning of a line.
   // Only match tags that start with a lowercase letter, to avoid false matches on unescaped
@@ -545,17 +563,29 @@ private static boolean hasMultipleNewlines(String s) {
   private static final Pattern BR_PATTERN = openTagPattern("br");
   private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b");
   private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*");
+
   /*
    * We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo<p>" (~400
-   * hits in Google-internal code). We will join unnecessarily split "words" (like "foo<b>bar</b>")
-   * in a later step. There's a similar story for braces. I'm not sure I actually need to exclude @
-   * or *. TODO(cpovirk): Try removing them.
+   * hits in Google-internal code).
    *
-   * Thanks to the "rejoin" step in joinAdjacentLiteralsAndAdjacentWhitespace(), we could get away
-   * with matching only one character here. That would eliminate the need for the regex entirely.
-   * That might be faster or slower than what we do now.
+   * TODO(cpovirk): might not need to exclude @ or *.
    */
-  private static final Pattern LITERAL_PATTERN = compile(".[^ \t\n@<{}*]*", DOTALL);
+  private static final Pattern CLASSIC_LITERAL_PATTERN = compile(".[^ \t\n@<{}*]*", DOTALL);
+
+  /*
+   * Many characters have special meaning in Markdown. Rather than list them all, we'll just match
+   * a sequence of alphabetic characters. Even digits can have special meaning, for numbered lists.
+   */
+  private static final Pattern MARKDOWN_LITERAL_PATTERN = compile(".\\p{IsAlphabetic}*", DOTALL);
+
+  /**
+   * The pattern used for "literals", things that do not have any special formatting meaning. This
+   * doesn't have to be a maximal sequence of literal characters, since adjacent literals will be
+   * joined together in a later step.
+   */
+  private Pattern literalPattern() {
+    return classicJavadoc ? CLASSIC_LITERAL_PATTERN : MARKDOWN_LITERAL_PATTERN;
+  }
 
   private static Pattern openTagPattern(String namePattern) {
     return compile(format("<(?:%s)\\b[^>]*>", namePattern), CASE_INSENSITIVE);