Add support for backslash in Markdown Javadoc.

eamonnmcmanus · google-java-format Team · commit ebe9da75a4e4 · 2026-04-23T13:59:03.000-07:00
PiperOrigin-RevId: 904623664
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java
@@ -43,6 +43,7 @@
 import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
 import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
 import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
+import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak;
 import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
 import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
 import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
@@ -133,6 +134,7 @@ private static String render(List<Token> input, int blockIndent, boolean classic
         case BrTag t -> output.writeBr(standardizeBrToken(t));
         case Whitespace unused -> output.requestWhitespace();
         case ForcedNewline unused -> output.writeLineBreakNoAutoIndent();
+        case MarkdownHardLineBreak unused -> output.writeMarkdownHardLineBreak();
         case Literal t -> output.writeLiteral(t);
         case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
         case ListItemCloseTag unused -> {}
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java
@@ -46,6 +46,7 @@
 import com.google.googlejavaformat.java.javadoc.Token.Literal;
 import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
 import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
+import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak;
 import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
 import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
 import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
@@ -238,9 +239,22 @@ private Function<String, Token> consumeToken() throws LexException {
       // remaining characters being matched *could* be those things, so the regex stops at
       // whitespace or a backtick. The *first* character could be a backtick, in constructs like
       // `` `foo` ``, where the backticks adjacent to "foo" are part of the text of the code span.
+      //
+      // Backslash has no special meaning inside `...` so this code precedes the backslash code.
       verify(input.tryConsumeRegex(WORD_IN_CODE_SPAN_PATTERN));
       return Literal::new;
     }
+    if (!classicJavadoc) {
+      // Markdown backslash handling. \ at end of line, optionally followed by whitespace, is a hard
+      // line break. \ elsewhere cancels any special meaning of the following character.
+      if (input.tryConsumeRegex(MARKDOWN_HARD_LINE_BREAK_PATTERN)) {
+        somethingSinceNewline = false;
+        return MarkdownHardLineBreak::new;
+      } else if (input.tryConsumeRegex(BACKSLASH_PLUS_CHARACTER_PATTERN)) {
+        somethingSinceNewline = true;
+        return Literal::new;
+      }
+    }
 
     /*
      * TODO(cpovirk): Maybe try to detect things like "{@code\n@GwtCompatible}" that aren't intended
@@ -661,6 +675,8 @@ private static boolean hasMultipleNewlines(String s) {
   private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b");
   private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*");
   private static final Pattern WORD_IN_CODE_SPAN_PATTERN = compile(".[^ \t\n`]*");
+  private static final Pattern MARKDOWN_HARD_LINE_BREAK_PATTERN = compile("\\\\[ \t]*\n");
+  private static final Pattern BACKSLASH_PLUS_CHARACTER_PATTERN = compile("\\\\.");
 
   /*
    * We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo<p>" (~400
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java
@@ -53,6 +53,9 @@
  * are we inside?"
  */
 final class JavadocWriter {
+
+  private static final Literal BACKSLASH_LITERAL = new Literal("\\");
+
   private final int blockIndent;
   private final boolean classicJavadoc;
   private final StringBuilder output = new StringBuilder();
@@ -322,6 +325,11 @@ void writeLineBreakNoAutoIndent() {
     writeNewline(NO_AUTO_INDENT);
   }
 
+  void writeMarkdownHardLineBreak() {
+    writeLiteral(BACKSLASH_LITERAL);
+    writeNewline();
+  }
+
   void writeLiteral(Literal token) {
     writeToken(token);
   }
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java
@@ -152,6 +152,9 @@ record Whitespace(String value) implements Token {}
    */
   record ForcedNewline(String value) implements Token {}
 
+  /** A Markdown hard line break ({@code \} at the end of a line). */
+  record MarkdownHardLineBreak(String value) implements Token {}
+
   /**
    * Token that permits but does not force a line break. The way that we accomplish this is somewhat
    * indirect: As far as {@link JavadocWriter} is concerned, this token is meaningless. But its mere
diff --git a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
@@ -1778,22 +1778,42 @@ class Test {}
   @Test
   public void markdownBackslashes() {
     assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
+    // We write `╲` (a box drawing character) instead of `\\` here and then substitute. That makes
+    // the test case a bit easier to read and also means that we can see where the line wrapping
+    // should happen. (Having to write \\ instead of \ would make the source text lines wider than
+    // the strings they represent.)
+    @SuppressWarnings("MisleadingEscapedSpace")
     String input =
-        """
-        /// \\<br> is not a break.
-        /// \\&#42; is not an HTML entity.
-        /// foo\\
-        /// bar
-        class Test {}
-        """;
-    // TODO: the <br> should not cause a line break, and the end-of-line backslash should.
-    // I don't think anything changes if we do or do not respect the \& backslash.
+"""
+/// ╲<br> is not a break.
+/// ╲&#42; is not an HTML entity.
+/// Backslash does not escape the end of a `code span╲` so <br> is a real break,
+/// but backslash does escape the *start* of a ╲`code span so <br> is also a real break.
+/// hard╲
+/// line╲\t\s
+/// breaks
+/// - foo ╲
+///     bar
+/// ╲@param not a param tag
+/// ╲╲@param not a param tag either
+class Test {}
+"""
+            .replace('╲', '\\');
+    // I don't think anything changes if we do or do not respect the \& backslash so nothing here
+    // proves whether we do.
     String expected =
-        """
-        /// \\<br>
-        /// is not a break. \\&#42; is not an HTML entity. foo\\ bar
-        class Test {}
-        """;
+"""
+/// ╲<br> is not a break. ╲&#42; is not an HTML entity. Backslash does not escape the end of a `code
+/// span╲` so <br>
+/// is a real break, but backslash does escape the *start* of a ╲`code span so <br>
+/// is also a real break. hard╲
+/// line╲
+/// breaks
+/// - foo ╲
+///   bar ╲@param not a param tag ╲╲@param not a param tag either
+class Test {}
+"""
+            .replace('╲', '\\');
     doFormatTest(input, expected);
   }
 
@@ -1976,14 +1996,6 @@ class Test {}
   // <pre> handling elsewhere. On the other hand, if we don't handle Markdown code spans (`...`)
   // correctly then we might incorrectly recognize HTML tags like `<ul>` inside them.
   //
-  // - Backslashes
-  //   - \<br> is not a break.
-  //   - \&#42; is not an HTML entity.
-  //   - \⏎ is a hard line break. https://spec.commonmark.org/0.31.2/#hard-line-break
-  //     A hard line break can also be written as two or more spaces followed by a newline. I think
-  //     that is ridiculous and it is absolutely fine to destroy those spaces. However the line
-  //     break will show up in the CommonMark parse.
-  //
   // - Thematic breaks: ---, ***, ___, which are all rendered as <hr> and should presumably have a
   //   line break before and after. https://spec.commonmark.org/0.31.2/#thematic-breaks
   //