Skip to content

Commit 6ddebe1

Browse files
authored
Merge branch 'main' into perf_heap_allocation_parse_host
2 parents a18ac4c + 968e862 commit 6ddebe1

File tree

6 files changed

+156
-33
lines changed

6 files changed

+156
-33
lines changed

LICENSE-MIT

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2013-2022 The rust-url developers
1+
Copyright (c) 2013-2025 The rust-url developers
22

33
Permission is hereby granted, free of charge, to any
44
person obtaining a copy of this software and associated

url/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ wasm-bindgen-test = "0.3"
2828
form_urlencoded = { version = "1.2.1", path = "../form_urlencoded", default-features = false, features = ["alloc"] }
2929
idna = { version = "1.0.3", path = "../idna", default-features = false, features = ["alloc", "compiled_data"] }
3030
percent-encoding = { version = "2.3.1", path = "../percent_encoding", default-features = false, features = ["alloc"] }
31-
serde = { version = "1.0", optional = true, features = ["derive"] }
31+
serde = { version = "1.0", optional = true, features = ["derive"], default-features = false }
3232

3333
[features]
3434
default = ["std"]

url/benches/parse_url.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,19 @@ fn punycode_rtl(bench: &mut Bencher) {
9696
bench.iter(|| black_box(url).parse::<Url>().unwrap());
9797
}
9898

99+
fn url_to_file_path(bench: &mut Bencher) {
100+
let url = if cfg!(windows) {
101+
"file:///C:/dir/next_dir/sub_sub_dir/testing/testing.json"
102+
} else {
103+
"file:///data/dir/next_dir/sub_sub_dir/testing/testing.json"
104+
};
105+
let url = url.parse::<Url>().unwrap();
106+
107+
bench.iter(|| {
108+
black_box(url.to_file_path().unwrap());
109+
});
110+
}
111+
99112
benchmark_group!(
100113
benches,
101114
short,
@@ -111,5 +124,6 @@ benchmark_group!(
111124
punycode_ltr,
112125
unicode_rtl,
113126
punycode_rtl,
127+
url_to_file_path
114128
);
115129
benchmark_main!(benches);

url/src/lib.rs

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,11 @@ impl Url {
445445
/// let url = base.join("//eve.com/b")?;
446446
/// assert_eq!(url.as_str(), "https://eve.com/b");
447447
///
448+
/// // Input as base url relative special URL
449+
/// let base = Url::parse("https://alice.com/a")?;
450+
/// let url = base.join("/v1/meta")?;
451+
/// assert_eq!(url.as_str(), "https://alice.com/v1/meta");
452+
///
448453
/// // Input as absolute URL
449454
/// let base = Url::parse("https://alice.com/a")?;
450455
/// let url = base.join("http://eve.com/b")?;
@@ -2721,7 +2726,26 @@ impl Url {
27212726
_ => return Err(()),
27222727
};
27232728

2724-
return file_url_segments_to_pathbuf(host, segments);
2729+
let str_len = self.as_str().len();
2730+
let estimated_capacity = if cfg!(target_os = "redox") {
2731+
let scheme_len = self.scheme().len();
2732+
let file_scheme_len = "file".len();
2733+
// remove only // because it still has file:
2734+
if scheme_len < file_scheme_len {
2735+
let scheme_diff = file_scheme_len - scheme_len;
2736+
(str_len + scheme_diff).saturating_sub(2)
2737+
} else {
2738+
let scheme_diff = scheme_len - file_scheme_len;
2739+
str_len.saturating_sub(scheme_diff + 2)
2740+
}
2741+
} else if cfg!(windows) {
2742+
// remove scheme: - has posssible \\ for hostname
2743+
str_len.saturating_sub(self.scheme().len() + 1)
2744+
} else {
2745+
// remove scheme://
2746+
str_len.saturating_sub(self.scheme().len() + 3)
2747+
};
2748+
return file_url_segments_to_pathbuf(estimated_capacity, host, segments);
27252749
}
27262750
Err(())
27272751
}
@@ -3031,6 +3055,7 @@ fn path_to_file_url_segments_windows(
30313055
any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
30323056
))]
30333057
fn file_url_segments_to_pathbuf(
3058+
estimated_capacity: usize,
30343059
host: Option<&str>,
30353060
segments: str::Split<'_, char>,
30363061
) -> Result<PathBuf, ()> {
@@ -3042,17 +3067,16 @@ fn file_url_segments_to_pathbuf(
30423067
use std::os::hermit::ffi::OsStrExt;
30433068
#[cfg(any(unix, target_os = "redox"))]
30443069
use std::os::unix::prelude::OsStrExt;
3045-
use std::path::PathBuf;
30463070

30473071
if host.is_some() {
30483072
return Err(());
30493073
}
30503074

3051-
let mut bytes = if cfg!(target_os = "redox") {
3052-
b"file:".to_vec()
3053-
} else {
3054-
Vec::new()
3055-
};
3075+
let mut bytes = Vec::new();
3076+
bytes.try_reserve(estimated_capacity).map_err(|_| ())?;
3077+
if cfg!(target_os = "redox") {
3078+
bytes.extend(b"file:");
3079+
}
30563080

30573081
for segment in segments {
30583082
bytes.push(b'/');
@@ -3084,22 +3108,27 @@ fn file_url_segments_to_pathbuf(
30843108

30853109
#[cfg(all(feature = "std", windows))]
30863110
fn file_url_segments_to_pathbuf(
3111+
estimated_capacity: usize,
30873112
host: Option<&str>,
30883113
segments: str::Split<char>,
30893114
) -> Result<PathBuf, ()> {
3090-
file_url_segments_to_pathbuf_windows(host, segments)
3115+
file_url_segments_to_pathbuf_windows(estimated_capacity, host, segments)
30913116
}
30923117

30933118
// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
30943119
#[cfg(feature = "std")]
30953120
#[cfg_attr(not(windows), allow(dead_code))]
30963121
fn file_url_segments_to_pathbuf_windows(
3122+
estimated_capacity: usize,
30973123
host: Option<&str>,
30983124
mut segments: str::Split<'_, char>,
30993125
) -> Result<PathBuf, ()> {
3100-
use percent_encoding::percent_decode;
3101-
let mut string = if let Some(host) = host {
3102-
r"\\".to_owned() + host
3126+
use percent_encoding::percent_decode_str;
3127+
let mut string = String::new();
3128+
string.try_reserve(estimated_capacity).map_err(|_| ())?;
3129+
if let Some(host) = host {
3130+
string.push_str(r"\\");
3131+
string.push_str(host);
31033132
} else {
31043133
let first = segments.next().ok_or(())?;
31053134

@@ -3109,7 +3138,7 @@ fn file_url_segments_to_pathbuf_windows(
31093138
return Err(());
31103139
}
31113140

3112-
first.to_owned()
3141+
string.push_str(first);
31133142
}
31143143

31153144
4 => {
@@ -3121,7 +3150,8 @@ fn file_url_segments_to_pathbuf_windows(
31213150
return Err(());
31223151
}
31233152

3124-
first[0..1].to_owned() + ":"
3153+
string.push_str(&first[0..1]);
3154+
string.push(':');
31253155
}
31263156

31273157
_ => return Err(()),
@@ -3132,11 +3162,20 @@ fn file_url_segments_to_pathbuf_windows(
31323162
string.push('\\');
31333163

31343164
// Currently non-unicode windows paths cannot be represented
3135-
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3165+
match percent_decode_str(segment).decode_utf8() {
31363166
Ok(s) => string.push_str(&s),
31373167
Err(..) => return Err(()),
31383168
}
31393169
}
3170+
// ensure our estimated capacity was good
3171+
if cfg!(test) {
3172+
debug_assert!(
3173+
string.len() <= estimated_capacity,
3174+
"len: {}, capacity: {}",
3175+
string.len(),
3176+
estimated_capacity
3177+
);
3178+
}
31403179
let path = PathBuf::from(string);
31413180
debug_assert!(
31423181
path.is_absolute(),

url/src/parser.rs

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -402,15 +402,15 @@ impl<'a> Parser<'a> {
402402
}
403403

404404
pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
405-
if input.is_empty() || !input.starts_with(ascii_alpha) {
405+
// starts_with will also fail for empty strings so we can skip that comparison for perf
406+
if !input.starts_with(ascii_alpha) {
406407
return Err(());
407408
}
408409
debug_assert!(self.serialization.is_empty());
409410
while let Some(c) = input.next() {
410411
match c {
411-
'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => {
412-
self.serialization.push(c.to_ascii_lowercase())
413-
}
412+
'a'..='z' | '0'..='9' | '+' | '-' | '.' => self.serialization.push(c),
413+
'A'..='Z' => self.serialization.push(c.to_ascii_lowercase()),
414414
':' => return Ok(input),
415415
_ => {
416416
self.serialization.clear();
@@ -1193,32 +1193,96 @@ impl<'a> Parser<'a> {
11931193
path_start: usize,
11941194
mut input: Input<'i>,
11951195
) -> Input<'i> {
1196+
// it's much faster to call utf8_percent_encode in bulk
1197+
fn push_pending(
1198+
serialization: &mut String,
1199+
start_str: &str,
1200+
remaining_len: usize,
1201+
context: Context,
1202+
scheme_type: SchemeType,
1203+
) {
1204+
let text = &start_str[..start_str.len() - remaining_len];
1205+
if text.is_empty() {
1206+
return;
1207+
}
1208+
if context == Context::PathSegmentSetter {
1209+
if scheme_type.is_special() {
1210+
serialization.extend(utf8_percent_encode(text, SPECIAL_PATH_SEGMENT));
1211+
} else {
1212+
serialization.extend(utf8_percent_encode(text, PATH_SEGMENT));
1213+
}
1214+
} else {
1215+
serialization.extend(utf8_percent_encode(text, PATH));
1216+
}
1217+
}
1218+
11961219
// Relative path state
11971220
loop {
11981221
let mut segment_start = self.serialization.len();
11991222
let mut ends_with_slash = false;
1223+
let mut start_str = input.chars.as_str();
12001224
loop {
12011225
let input_before_c = input.clone();
1202-
let (c, utf8_c) = if let Some(x) = input.next_utf8() {
1203-
x
1226+
// bypass input.next() and manually handle ascii_tab_or_new_line
1227+
// in order to encode string slices in bulk
1228+
let c = if let Some(c) = input.chars.next() {
1229+
c
12041230
} else {
1231+
push_pending(
1232+
&mut self.serialization,
1233+
start_str,
1234+
0,
1235+
self.context,
1236+
scheme_type,
1237+
);
12051238
break;
12061239
};
12071240
match c {
1241+
ascii_tab_or_new_line_pattern!() => {
1242+
push_pending(
1243+
&mut self.serialization,
1244+
start_str,
1245+
input_before_c.chars.as_str().len(),
1246+
self.context,
1247+
scheme_type,
1248+
);
1249+
start_str = input.chars.as_str();
1250+
}
12081251
'/' if self.context != Context::PathSegmentSetter => {
1252+
push_pending(
1253+
&mut self.serialization,
1254+
start_str,
1255+
input_before_c.chars.as_str().len(),
1256+
self.context,
1257+
scheme_type,
1258+
);
12091259
self.serialization.push(c);
12101260
ends_with_slash = true;
12111261
break;
12121262
}
12131263
'\\' if self.context != Context::PathSegmentSetter
12141264
&& scheme_type.is_special() =>
12151265
{
1266+
push_pending(
1267+
&mut self.serialization,
1268+
start_str,
1269+
input_before_c.chars.as_str().len(),
1270+
self.context,
1271+
scheme_type,
1272+
);
12161273
self.log_violation(SyntaxViolation::Backslash);
12171274
self.serialization.push('/');
12181275
ends_with_slash = true;
12191276
break;
12201277
}
12211278
'?' | '#' if self.context == Context::UrlParser => {
1279+
push_pending(
1280+
&mut self.serialization,
1281+
start_str,
1282+
input_before_c.chars.as_str().len(),
1283+
self.context,
1284+
scheme_type,
1285+
);
12221286
input = input_before_c;
12231287
break;
12241288
}
@@ -1230,23 +1294,21 @@ impl<'a> Parser<'a> {
12301294
&self.serialization[path_start + 1..],
12311295
)
12321296
{
1297+
push_pending(
1298+
&mut self.serialization,
1299+
start_str,
1300+
input_before_c.chars.as_str().len(),
1301+
self.context,
1302+
scheme_type,
1303+
);
1304+
start_str = input_before_c.chars.as_str();
12331305
self.serialization.push('/');
12341306
segment_start += 1;
12351307
}
1236-
if self.context == Context::PathSegmentSetter {
1237-
if scheme_type.is_special() {
1238-
self.serialization
1239-
.extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
1240-
} else {
1241-
self.serialization
1242-
.extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
1243-
}
1244-
} else {
1245-
self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
1246-
}
12471308
}
12481309
}
12491310
}
1311+
12501312
let segment_before_slash = if ends_with_slash {
12511313
&self.serialization[segment_start..self.serialization.len() - 1]
12521314
} else {

url/tests/unit.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,14 @@ fn test_set_scheme_to_file_with_host() {
10311031
assert_eq!(result, Err(()));
10321032
}
10331033

1034+
#[test]
1035+
fn test_set_scheme_empty_err() {
1036+
let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap();
1037+
let result = url.set_scheme("");
1038+
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar");
1039+
assert_eq!(result, Err(()));
1040+
}
1041+
10341042
#[test]
10351043
fn no_panic() {
10361044
let mut url = Url::parse("arhttpsps:/.//eom/dae.com/\\\\t\\:").unwrap();

0 commit comments

Comments
 (0)