Skip to content

Commit d40c4f1

Browse files
committed
Align IPv4 parsing to spec
This commit aligns the IPv4 parsing steps to spec and adds the relevant WPT tests.
1 parent 6275b41 commit d40c4f1

File tree

3 files changed

+202
-32
lines changed

3 files changed

+202
-32
lines changed

url/src/host.rs

+40-30
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ impl Host<String> {
109109

110110
if domain.find(is_invalid_domain_char).is_some() {
111111
Err(ParseError::InvalidDomainCharacter)
112-
} else if let Some(address) = parse_ipv4addr(&domain)? {
112+
} else if ends_in_a_number(&domain) {
113+
let address = parse_ipv4addr(&domain)?;
113114
Ok(Host::Ipv4(address))
114115
} else {
115116
Ok(Host::Domain(domain))
@@ -264,8 +265,33 @@ fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
264265
}
265266
}
266267

268+
/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
269+
fn ends_in_a_number(input: &str) -> bool {
270+
let mut parts = input.rsplit('.');
271+
let last = parts.next().unwrap();
272+
let last = if last.is_empty() {
273+
if let Some(last) = parts.next() {
274+
last
275+
} else {
276+
return false;
277+
}
278+
} else {
279+
last
280+
};
281+
if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
282+
return true;
283+
}
284+
285+
parse_ipv4number(last).is_ok()
286+
}
287+
267288
/// <https://url.spec.whatwg.org/#ipv4-number-parser>
289+
/// Ok(None) means the input is a valid number, but it overflows a `u32`.
268290
fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
291+
if input.is_empty() {
292+
return Err(());
293+
}
294+
269295
let mut r = 10;
270296
if input.starts_with("0x") || input.starts_with("0X") {
271297
input = &input[2..];
@@ -275,10 +301,10 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
275301
r = 8;
276302
}
277303

278-
// At the moment we can't know the reason why from_str_radix fails
279-
// https://github.com/rust-lang/rust/issues/22639
280-
// So instead we check if the input looks like a real number and only return
281-
// an error when it's an overflow.
304+
if input.is_empty() {
305+
return Ok(Some(0));
306+
}
307+
282308
let valid_number = match r {
283309
8 => input.chars().all(|c| ('0'..='7').contains(&c)),
284310
10 => input.chars().all(|c| ('0'..='9').contains(&c)),
@@ -287,50 +313,34 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
287313
}),
288314
_ => false,
289315
};
290-
291316
if !valid_number {
292-
return Ok(None);
317+
return Err(());
293318
}
294319

295-
if input.is_empty() {
296-
return Ok(Some(0));
297-
}
298-
if input.starts_with('+') {
299-
return Ok(None);
300-
}
301320
match u32::from_str_radix(input, r) {
302-
Ok(number) => Ok(Some(number)),
303-
Err(_) => Err(()),
321+
Ok(num) => Ok(Some(num)),
322+
Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
323+
// The validity of the chars in the input is checked above.
304324
}
305325
}
306326

307327
/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
308-
fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
309-
if input.is_empty() {
310-
return Ok(None);
311-
}
328+
fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
312329
let mut parts: Vec<&str> = input.split('.').collect();
313330
if parts.last() == Some(&"") {
314331
parts.pop();
315332
}
316333
if parts.len() > 4 {
317-
return Ok(None);
334+
return Err(ParseError::InvalidIpv4Address);
318335
}
319336
let mut numbers: Vec<u32> = Vec::new();
320-
let mut overflow = false;
321337
for part in parts {
322-
if part.is_empty() {
323-
return Ok(None);
324-
}
325338
match parse_ipv4number(part) {
326339
Ok(Some(n)) => numbers.push(n),
327-
Ok(None) => return Ok(None),
328-
Err(()) => overflow = true,
340+
Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
341+
Err(()) => return Err(ParseError::InvalidIpv4Address),
329342
};
330343
}
331-
if overflow {
332-
return Err(ParseError::InvalidIpv4Address);
333-
}
334344
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
335345
// Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
336346
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
@@ -342,7 +352,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
342352
for (counter, n) in numbers.iter().enumerate() {
343353
ipv4 += n << (8 * (3 - counter as u32))
344354
}
345-
Ok(Some(Ipv4Addr::from(ipv4)))
355+
Ok(Ipv4Addr::from(ipv4))
346356
}
347357

348358
/// <https://url.spec.whatwg.org/#concept-ipv6-parser>

url/tests/unit.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ fn host() {
256256
0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344,
257257
)),
258258
);
259-
assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49"));
260259
assert_host(
261260
"http://[::]",
262261
Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)),
@@ -271,7 +270,8 @@ fn host() {
271270
);
272271
assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
273272
assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111)));
274-
assert_host("http://2..2.3", Host::Domain("2..2.3"));
273+
assert!(Url::parse("http://1.35.+33.49").is_err());
274+
assert!(Url::parse("http://2..2.3").is_err());
275275
assert!(Url::parse("http://42.0x1232131").is_err());
276276
assert!(Url::parse("http://192.168.0.257").is_err());
277277

url/tests/urltestdata.json

+160
Original file line numberDiff line numberDiff line change
@@ -7744,5 +7744,165 @@
77447744
"input": "?",
77457745
"base": null,
77467746
"failure": true
7747+
},
7748+
"Last component looks like a number, but not valid IPv4",
7749+
{
7750+
"input": "http://1.2.3.4.5",
7751+
"base": "http://other.com/",
7752+
"failure": true
7753+
},
7754+
{
7755+
"input": "http://1.2.3.4.5.",
7756+
"base": "http://other.com/",
7757+
"failure": true
7758+
},
7759+
{
7760+
"input": "http://0..0x300/",
7761+
"base": "about:blank",
7762+
"failure": true
7763+
},
7764+
{
7765+
"input": "http://0..0x300./",
7766+
"base": "about:blank",
7767+
"failure": true
7768+
},
7769+
{
7770+
"input": "http://256.256.256.256.256",
7771+
"base": "http://other.com/",
7772+
"failure": true
7773+
},
7774+
{
7775+
"input": "http://256.256.256.256.256.",
7776+
"base": "http://other.com/",
7777+
"failure": true
7778+
},
7779+
{
7780+
"input": "http://1.2.3.08",
7781+
"base": "about:blank",
7782+
"failure": true
7783+
},
7784+
{
7785+
"input": "http://1.2.3.08.",
7786+
"base": "about:blank",
7787+
"failure": true
7788+
},
7789+
{
7790+
"input": "http://1.2.3.09",
7791+
"base": "about:blank",
7792+
"failure": true
7793+
},
7794+
{
7795+
"input": "http://09.2.3.4",
7796+
"base": "about:blank",
7797+
"failure": true
7798+
},
7799+
{
7800+
"input": "http://09.2.3.4.",
7801+
"base": "about:blank",
7802+
"failure": true
7803+
},
7804+
{
7805+
"input": "http://01.2.3.4.5",
7806+
"base": "about:blank",
7807+
"failure": true
7808+
},
7809+
{
7810+
"input": "http://01.2.3.4.5.",
7811+
"base": "about:blank",
7812+
"failure": true
7813+
},
7814+
{
7815+
"input": "http://0x100.2.3.4",
7816+
"base": "about:blank",
7817+
"failure": true
7818+
},
7819+
{
7820+
"input": "http://0x100.2.3.4.",
7821+
"base": "about:blank",
7822+
"failure": true
7823+
},
7824+
{
7825+
"input": "http://0x1.2.3.4.5",
7826+
"base": "about:blank",
7827+
"failure": true
7828+
},
7829+
{
7830+
"input": "http://0x1.2.3.4.5.",
7831+
"base": "about:blank",
7832+
"failure": true
7833+
},
7834+
{
7835+
"input": "http://foo.1.2.3.4",
7836+
"base": "about:blank",
7837+
"failure": true
7838+
},
7839+
{
7840+
"input": "http://foo.1.2.3.4.",
7841+
"base": "about:blank",
7842+
"failure": true
7843+
},
7844+
{
7845+
"input": "http://foo.2.3.4",
7846+
"base": "about:blank",
7847+
"failure": true
7848+
},
7849+
{
7850+
"input": "http://foo.2.3.4.",
7851+
"base": "about:blank",
7852+
"failure": true
7853+
},
7854+
{
7855+
"input": "http://foo.09",
7856+
"base": "about:blank",
7857+
"failure": true
7858+
},
7859+
{
7860+
"input": "http://foo.09.",
7861+
"base": "about:blank",
7862+
"failure": true
7863+
},
7864+
{
7865+
"input": "http://foo.0x4",
7866+
"base": "about:blank",
7867+
"failure": true
7868+
},
7869+
{
7870+
"input": "http://foo.0x4.",
7871+
"base": "about:blank",
7872+
"failure": true
7873+
},
7874+
{
7875+
"input": "http://foo.09..",
7876+
"base": "about:blank",
7877+
"hash": "",
7878+
"host": "foo.09..",
7879+
"hostname": "foo.09..",
7880+
"href":"http://foo.09../",
7881+
"password": "",
7882+
"pathname": "/",
7883+
"port":"",
7884+
"protocol": "http:",
7885+
"search": "",
7886+
"username": ""
7887+
},
7888+
{
7889+
"input": "http://0999999999999999999/",
7890+
"base": "about:blank",
7891+
"failure": true
7892+
},
7893+
{
7894+
"input": "http://foo.0x",
7895+
"base": "about:blank",
7896+
"failure": true
7897+
},
7898+
{
7899+
"input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123",
7900+
"base": "about:blank",
7901+
"failure": true
7902+
},
7903+
{
7904+
"input": "http://💩.123/",
7905+
"base": "about:blank",
7906+
"failure": true
77477907
}
77487908
]

0 commit comments

Comments
 (0)