Skip to content

Commit 565cf5a

Browse files
committed
Optimize documentation lints **a lot** (1/2)
Turns out that `doc_markdown` uses a non-cheap rustdoc function to convert from markdown ranges into source spans. And it was using it a lot (about once every 18 lines of documentation on `tokio`, which ends up being about 1800 times). This ended up being about 18% of the total Clippy runtime as discovered by lintcheck --perf in docs-heavy crates. This PR optimizes one of the cases in which Clippy calls the function, and a future PR once pulldown-cmark/pulldown-cmark issue number 1034 is merged will be open. Note that not all crates were affected by this crate equally, those with more docs are affected far more than those light ones.
1 parent 34f81f9 commit 565cf5a

File tree

2 files changed

+60
-19
lines changed

2 files changed

+60
-19
lines changed

clippy_lints/src/doc/markdown.rs

+55-15
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@ use rustc_lint::LateContext;
66
use rustc_span::{BytePos, Pos, Span};
77
use url::Url;
88

9-
use crate::doc::DOC_MARKDOWN;
9+
use crate::doc::{DOC_MARKDOWN, Fragments};
10+
use std::ops::{ControlFlow, Range};
1011

1112
pub fn check(
1213
cx: &LateContext<'_>,
1314
valid_idents: &FxHashSet<String>,
1415
text: &str,
15-
span: Span,
16+
fragments: &Fragments<'_>,
17+
fragment_range: Range<usize>,
1618
code_level: isize,
1719
blockquote_level: isize,
1820
) {
@@ -64,23 +66,38 @@ pub fn check(
6466
close_parens += 1;
6567
}
6668

67-
// Adjust for the current word
68-
let offset = word.as_ptr() as usize - text.as_ptr() as usize;
69-
let span = Span::new(
70-
span.lo() + BytePos::from_usize(offset),
71-
span.lo() + BytePos::from_usize(offset + word.len()),
72-
span.ctxt(),
73-
span.parent(),
74-
);
69+
// We'll use this offset to calculate the span to lint.
70+
let fragment_offset = word.as_ptr() as usize - text.as_ptr() as usize;
7571

76-
check_word(cx, word, span, code_level, blockquote_level);
72+
// Adjust for the current word
73+
if check_word(
74+
cx,
75+
word,
76+
fragments,
77+
&fragment_range,
78+
fragment_offset,
79+
code_level,
80+
blockquote_level,
81+
)
82+
.is_break()
83+
{
84+
return;
85+
}
7786
}
7887
}
7988

80-
fn check_word(cx: &LateContext<'_>, word: &str, span: Span, code_level: isize, blockquote_level: isize) {
89+
fn check_word(
90+
cx: &LateContext<'_>,
91+
word: &str,
92+
fragments: &Fragments<'_>,
93+
range: &Range<usize>,
94+
fragment_offset: usize,
95+
code_level: isize,
96+
blockquote_level: isize,
97+
) -> ControlFlow<()> {
8198
/// Checks if a string is upper-camel-case, i.e., starts with an uppercase and
8299
/// contains at least two uppercase letters (`Clippy` is ok) and one lower-case
83-
/// letter (`NASA` is ok).
100+
/// letter (`NASA` is ok).[
84101
/// Plurals are also excluded (`IDs` is ok).
85102
fn is_camel_case(s: &str) -> bool {
86103
if s.starts_with(|c: char| c.is_ascii_digit() | c.is_ascii_lowercase()) {
@@ -117,6 +134,17 @@ fn check_word(cx: &LateContext<'_>, word: &str, span: Span, code_level: isize, b
117134
// try to get around the fact that `foo::bar` parses as a valid URL
118135
&& !url.cannot_be_a_base()
119136
{
137+
let Some(fragment_span) = fragments.span(cx, range.clone()) else {
138+
return ControlFlow::Break(());
139+
};
140+
141+
let span = Span::new(
142+
fragment_span.lo() + BytePos::from_usize(fragment_offset),
143+
fragment_span.lo() + BytePos::from_usize(fragment_offset + word.len()),
144+
fragment_span.ctxt(),
145+
fragment_span.parent(),
146+
);
147+
120148
span_lint_and_sugg(
121149
cx,
122150
DOC_MARKDOWN,
@@ -126,17 +154,28 @@ fn check_word(cx: &LateContext<'_>, word: &str, span: Span, code_level: isize, b
126154
format!("<{word}>"),
127155
Applicability::MachineApplicable,
128156
);
129-
return;
157+
return ControlFlow::Continue(());
130158
}
131159

132160
// We assume that mixed-case words are not meant to be put inside backticks. (Issue #2343)
133161
//
134162
// We also assume that backticks are not necessary if inside a quote. (Issue #10262)
135163
if code_level > 0 || blockquote_level > 0 || (has_underscore(word) && has_hyphen(word)) {
136-
return;
164+
return ControlFlow::Break(());
137165
}
138166

139167
if has_underscore(word) || word.contains("::") || is_camel_case(word) || word.ends_with("()") {
168+
let Some(fragment_span) = fragments.span(cx, range.clone()) else {
169+
return ControlFlow::Break(());
170+
};
171+
172+
let span = Span::new(
173+
fragment_span.lo() + BytePos::from_usize(fragment_offset),
174+
fragment_span.lo() + BytePos::from_usize(fragment_offset + word.len()),
175+
fragment_span.ctxt(),
176+
fragment_span.parent(),
177+
);
178+
140179
span_lint_and_then(
141180
cx,
142181
DOC_MARKDOWN,
@@ -149,4 +188,5 @@ fn check_word(cx: &LateContext<'_>, word: &str, span: Span, code_level: isize, b
149188
},
150189
);
151190
}
191+
ControlFlow::Continue(())
152192
}

clippy_lints/src/doc/mod.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,10 @@ struct Fragments<'a> {
730730
}
731731

732732
impl Fragments<'_> {
733-
fn span(self, cx: &LateContext<'_>, range: Range<usize>) -> Option<Span> {
733+
/// get the span for the markdown range. Note that this function is not cheap, use it with
734+
/// caution.
735+
#[must_use]
736+
fn span(&self, cx: &LateContext<'_>, range: Range<usize>) -> Option<Span> {
734737
source_span_for_markdown_range(cx.tcx, self.doc, &range, self.fragments)
735738
}
736739
}
@@ -1068,9 +1071,7 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
10681071
);
10691072
} else {
10701073
for (text, range, assoc_code_level) in text_to_check {
1071-
if let Some(span) = fragments.span(cx, range) {
1072-
markdown::check(cx, valid_idents, &text, span, assoc_code_level, blockquote_level);
1073-
}
1074+
markdown::check(cx, valid_idents, &text, &fragments, range, assoc_code_level, blockquote_level);
10741075
}
10751076
}
10761077
text_to_check = Vec::new();

0 commit comments

Comments
 (0)