Skip to content

Commit ebe9da7

Browse files
eamonnmcmanusgoogle-java-format Team
authored andcommitted
Add support for backslash in Markdown Javadoc.
PiperOrigin-RevId: 904623664
1 parent e10f310 commit ebe9da7

5 files changed

Lines changed: 63 additions & 22 deletions

File tree

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
4444
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
4545
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
46+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak;
4647
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
4748
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
4849
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
@@ -133,6 +134,7 @@ private static String render(List<Token> input, int blockIndent, boolean classic
133134
case BrTag t -> output.writeBr(standardizeBrToken(t));
134135
case Whitespace unused -> output.requestWhitespace();
135136
case ForcedNewline unused -> output.writeLineBreakNoAutoIndent();
137+
case MarkdownHardLineBreak unused -> output.writeMarkdownHardLineBreak();
136138
case Literal t -> output.writeLiteral(t);
137139
case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
138140
case ListItemCloseTag unused -> {}

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import com.google.googlejavaformat.java.javadoc.Token.Literal;
4747
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
4848
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
49+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak;
4950
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
5051
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
5152
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
@@ -238,9 +239,22 @@ private Function<String, Token> consumeToken() throws LexException {
238239
// remaining characters being matched *could* be those things, so the regex stops at
239240
// whitespace or a backtick. The *first* character could be a backtick, in constructs like
240241
// `` `foo` ``, where the backticks adjacent to "foo" are part of the text of the code span.
242+
//
243+
// Backslash has no special meaning inside `...` so this code precedes the backslash code.
241244
verify(input.tryConsumeRegex(WORD_IN_CODE_SPAN_PATTERN));
242245
return Literal::new;
243246
}
247+
if (!classicJavadoc) {
248+
// Markdown backslash handling. \ at end of line, optionally followed by whitespace, is a hard
249+
// line break. \ elsewhere cancels any special meaning of the following character.
250+
if (input.tryConsumeRegex(MARKDOWN_HARD_LINE_BREAK_PATTERN)) {
251+
somethingSinceNewline = false;
252+
return MarkdownHardLineBreak::new;
253+
} else if (input.tryConsumeRegex(BACKSLASH_PLUS_CHARACTER_PATTERN)) {
254+
somethingSinceNewline = true;
255+
return Literal::new;
256+
}
257+
}
244258

245259
/*
246260
* TODO(cpovirk): Maybe try to detect things like "{@code\n@GwtCompatible}" that aren't intended
@@ -661,6 +675,8 @@ private static boolean hasMultipleNewlines(String s) {
661675
private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b");
662676
private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*");
663677
private static final Pattern WORD_IN_CODE_SPAN_PATTERN = compile(".[^ \t\n`]*");
678+
private static final Pattern MARKDOWN_HARD_LINE_BREAK_PATTERN = compile("\\\\[ \t]*\n");
679+
private static final Pattern BACKSLASH_PLUS_CHARACTER_PATTERN = compile("\\\\.");
664680

665681
/*
666682
* We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo<p>" (~400

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353
* are we inside?"
5454
*/
5555
final class JavadocWriter {
56+
57+
private static final Literal BACKSLASH_LITERAL = new Literal("\\");
58+
5659
private final int blockIndent;
5760
private final boolean classicJavadoc;
5861
private final StringBuilder output = new StringBuilder();
@@ -322,6 +325,11 @@ void writeLineBreakNoAutoIndent() {
322325
writeNewline(NO_AUTO_INDENT);
323326
}
324327

328+
void writeMarkdownHardLineBreak() {
329+
writeLiteral(BACKSLASH_LITERAL);
330+
writeNewline();
331+
}
332+
325333
void writeLiteral(Literal token) {
326334
writeToken(token);
327335
}

core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ record Whitespace(String value) implements Token {}
152152
*/
153153
record ForcedNewline(String value) implements Token {}
154154

155+
/** A Markdown hard line break ({@code \} at the end of a line). */
156+
record MarkdownHardLineBreak(String value) implements Token {}
157+
155158
/**
156159
* Token that permits but does not force a line break. The way that we accomplish this is somewhat
157160
* indirect: As far as {@link JavadocWriter} is concerned, this token is meaningless. But its mere

core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,22 +1778,42 @@ class Test {}
17781778
@Test
17791779
public void markdownBackslashes() {
17801780
assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
1781+
// We write `╲` (a box drawing character) instead of `\\` here and then substitute. That makes
1782+
// the test case a bit easier to read and also means that we can see where the line wrapping
1783+
// should happen. (Having to write \\ instead of \ would make the source text lines wider than
1784+
// the strings they represent.)
1785+
@SuppressWarnings("MisleadingEscapedSpace")
17811786
String input =
1782-
"""
1783-
/// \\<br> is not a break.
1784-
/// \\&#42; is not an HTML entity.
1785-
/// foo\\
1786-
/// bar
1787-
class Test {}
1788-
""";
1789-
// TODO: the <br> should not cause a line break, and the end-of-line backslash should.
1790-
// I don't think anything changes if we do or do not respect the \& backslash.
1787+
"""
1788+
/// ╲<br> is not a break.
1789+
/// ╲&#42; is not an HTML entity.
1790+
/// Backslash does not escape the end of a `code span╲` so <br> is a real break,
1791+
/// but backslash does escape the *start* of a ╲`code span so <br> is also a real break.
1792+
/// hard╲
1793+
/// line╲\t\s
1794+
/// breaks
1795+
/// - foo ╲
1796+
/// bar
1797+
/// ╲@param not a param tag
1798+
/// ╲╲@param not a param tag either
1799+
class Test {}
1800+
"""
1801+
.replace('╲', '\\');
1802+
// I don't think anything changes if we do or do not respect the \& backslash so nothing here
1803+
// proves whether we do.
17911804
String expected =
1792-
"""
1793-
/// \\<br>
1794-
/// is not a break. \\&#42; is not an HTML entity. foo\\ bar
1795-
class Test {}
1796-
""";
1805+
"""
1806+
/// ╲<br> is not a break. ╲&#42; is not an HTML entity. Backslash does not escape the end of a `code
1807+
/// span╲` so <br>
1808+
/// is a real break, but backslash does escape the *start* of a ╲`code span so <br>
1809+
/// is also a real break. hard╲
1810+
/// line╲
1811+
/// breaks
1812+
/// - foo ╲
1813+
/// bar ╲@param not a param tag ╲╲@param not a param tag either
1814+
class Test {}
1815+
"""
1816+
.replace('╲', '\\');
17971817
doFormatTest(input, expected);
17981818
}
17991819

@@ -1976,14 +1996,6 @@ class Test {}
19761996
// <pre> handling elsewhere. On the other hand, if we don't handle Markdown code spans (`...`)
19771997
// correctly then we might incorrectly recognize HTML tags like `<ul>` inside them.
19781998
//
1979-
// - Backslashes
1980-
// - \<br> is not a break.
1981-
// - \&#42; is not an HTML entity.
1982-
// - \⏎ is a hard line break. https://spec.commonmark.org/0.31.2/#hard-line-break
1983-
// A hard line break can also be written as two or more spaces followed by a newline. I think
1984-
// that is ridiculous and it is absolutely fine to destroy those spaces. However the line
1985-
// break will show up in the CommonMark parse.
1986-
//
19871999
// - Thematic breaks: ---, ***, ___, which are all rendered as <hr> and should presumably have a
19882000
// line break before and after. https://spec.commonmark.org/0.31.2/#thematic-breaks
19892001
//

0 commit comments

Comments
 (0)