Skip to content

Commit 3bd603b

Browse files
authored
Rollup merge of #146195 - nixxo:urlencoding-fix, r=ehuss
fix partial urlencoded link support Hello Rust community. This is my first contribution, hope is useful. While translating in Italian the rust book https://github.com/nixxo/rust-lang-book-it I noticed that the linkchecker tool was failing reporting broken links on some pages even if the link worked properly in the browser. Upon inspection I noticed that mdbook basically urlencoded the links, but not urlencoded the heading IDs resulting in a non-identical anchor/IDs pairing that linkchecker reports as non-valid. looking at the source code for the linkchecker tool I noticed that urlencoding was done by the `small_url_encode` function in a partial way, as the name suggests. Replacing this function with a full urlencoding fixes the issue and the links are properly reported as valid. - added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs - added tests urlecoding provided by https://crates.io/crates/urlencoding
2 parents c5a62b8 + 8b58777 commit 3bd603b

File tree

6 files changed

+23
-12
lines changed

6 files changed

+23
-12
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2167,6 +2167,7 @@ version = "0.1.0"
21672167
dependencies = [
21682168
"html5ever",
21692169
"regex",
2170+
"urlencoding",
21702171
]
21712172

21722173
[[package]]
@@ -5824,6 +5825,12 @@ dependencies = [
58245825
"percent-encoding",
58255826
]
58265827

5828+
[[package]]
5829+
name = "urlencoding"
5830+
version = "2.1.3"
5831+
source = "registry+https://github.com/rust-lang/crates.io-index"
5832+
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
5833+
58275834
[[package]]
58285835
name = "utf-8"
58295836
version = "0.7.6"

src/tools/linkchecker/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ path = "main.rs"
1010
[dependencies]
1111
regex = "1"
1212
html5ever = "0.29.0"
13+
urlencoding = "2.1.3"

src/tools/linkchecker/main.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -232,18 +232,7 @@ enum FileEntry {
232232
type Cache = HashMap<String, FileEntry>;
233233

234234
fn small_url_encode(s: &str) -> String {
235-
s.replace('<', "%3C")
236-
.replace('>', "%3E")
237-
.replace(' ', "%20")
238-
.replace('?', "%3F")
239-
.replace('\'', "%27")
240-
.replace('&', "%26")
241-
.replace(',', "%2C")
242-
.replace(':', "%3A")
243-
.replace(';', "%3B")
244-
.replace('[', "%5B")
245-
.replace(']', "%5D")
246-
.replace('\"', "%22")
235+
urlencoding::encode(s).to_string()
247236
}
248237

249238
impl Checker {

src/tools/linkchecker/tests/valid/inner/bar.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,8 @@
33

44
<h2 id="barfrag">Bar</h2>
55

6+
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
7+
<h2 id="barfrag-è">Bar</h2>
8+
69
</body>
710
</html>

src/tools/linkchecker/tests/valid/inner/foo.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@
88
<a href="https://example.com/doesnotexist">external links not validated</a>
99
<a href="redir.html#redirfrag">Redirect</a>
1010

11+
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
12+
<a href="#localfrag-%C3%A8"></a>
13+
<a href="bar.html#barfrag-%C3%A8"></a>
14+
<a href="redir.html#redirfrag-%C3%A8"></a>
15+
1116
<h2 id="localfrag">Local</h2>
1217

18+
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
19+
<h2 id="localfrag-è">Local</h2>
20+
1321
</body>
1422
</html>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
<html>
22
<body>
33
<h2 id="redirfrag">Redir</h2>
4+
5+
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
6+
<h2 id="redirfrag-è">Redir</h2>
47
</body>
58
</html>

0 commit comments

Comments
 (0)